Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy50-cold-math Source: Original Platform
7368 lines
475 KiB
JSON
7368 lines
475 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.4786468696997992,
|
|
"calibration/batch_distribution_entropy": 0.27434989424557693,
|
|
"calibration/batch_entropy_100bins": 0.3452116907370852,
|
|
"calibration/batch_entropy_10bins": 0.27434989424557693,
|
|
"calibration/batch_entropy_50bins": 0.40370561408688826,
|
|
"calibration/batch_uniqueness": 0.4969804532848675,
|
|
"calibration/confidence_entropy": 0.215996847848038,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.45861411649511047,
|
|
"calibration/mean_confidence": 0.9140472626196257,
|
|
"calibration/prompt_uniqueness": 0.35674800174725496,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020225694444444442,
|
|
"completions/max_length": 4034.2,
|
|
"completions/max_terminated_length": 4034.2,
|
|
"completions/mean_length": 518.5538208007813,
|
|
"completions/mean_terminated_length": 529.2614379882813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.0034298275131732225,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0041,
|
|
"num_tokens": 9087948.0,
|
|
"reward": 0.48353423476219176,
|
|
"reward_std": 0.4472260117530823,
|
|
"rewards/accuracy_reward": 0.25295138359069824,
|
|
"rewards/brier_reward": 0.3069717109203339,
|
|
"rewards/confidence_uniqueness_reward": 0.28508294820785524,
|
|
"rewards/format_reward": 0.5980902671813965,
|
|
"rewards/frontier_aurc_reward": 0.26909309029579165,
|
|
"rewards/frontier_coverage_0": 0.26909309029579165,
|
|
"rewards/frontier_coverage_1": 0.26909309029579165,
|
|
"rewards/frontier_coverage_10": 0.26909309029579165,
|
|
"rewards/frontier_coverage_15": 0.26909309029579165,
|
|
"rewards/frontier_coverage_20": 0.26909309029579165,
|
|
"rewards/frontier_coverage_25": 0.26909309029579165,
|
|
"rewards/frontier_coverage_5": 0.26909309029579165,
|
|
"rewards/frontier_ece_reward": 0.26909309029579165,
|
|
"rewards/frontier_entropy_batch_reward": -0.5501068949699401,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.30129122734069824,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2361111111111111,
|
|
"signal/accuracy_reward/group_std_mean": 0.3599981427192688,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.11111111268401146,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15064561367034912,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15064561367034912,
|
|
"signal/advantage_abs_mean": 0.38422595858573916,
|
|
"signal/advantage_pre_scale_abs_mean": 0.38422595858573916,
|
|
"signal/advantage_pre_scale_std": 0.4541194498538971,
|
|
"signal/advantage_std": 0.4541194498538971,
|
|
"signal/brier_reward/centered_abs_mean": 0.31531033515930174,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5211805555555555,
|
|
"signal/brier_reward/group_std_mean": 0.36791505217552184,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031531032919883725,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.031531032919883725,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23501766622066497,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6003472222222223,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2864716470241547,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023501767963171005,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023501767963171005,
|
|
"signal/format_reward/centered_abs_mean": 0.43889973759651185,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.4739928424358368,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.21944986879825593,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.21944986879825593,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003807929763570428,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3046343684196472,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4024305555555555,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36159105896949767,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030463438108563425,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030463438108563425,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45770797729492185,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3309027777777778,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.49182985424995423,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0457707978785038,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0457707978785038,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5215323422297184,
|
|
"calibration/batch_distribution_entropy": 0.27063770046218427,
|
|
"calibration/batch_entropy_100bins": 0.3524338658024668,
|
|
"calibration/batch_entropy_10bins": 0.27063770046218427,
|
|
"calibration/batch_entropy_50bins": 0.41210669269116024,
|
|
"calibration/batch_uniqueness": 0.5186394142692434,
|
|
"calibration/confidence_entropy": 0.22697660378505544,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4744829452486872,
|
|
"calibration/mean_confidence": 0.9174019066918241,
|
|
"calibration/prompt_uniqueness": 0.4043385400555728,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017274305555555557,
|
|
"completions/max_length": 4042.4,
|
|
"completions/max_terminated_length": 4042.4,
|
|
"completions/mean_length": 478.6330810546875,
|
|
"completions/mean_terminated_length": 487.2344909667969,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 19.2,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.020310023799538612,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0026,
|
|
"num_tokens": 17684521.0,
|
|
"reward": 0.5639899730682373,
|
|
"reward_std": 0.4257666528224945,
|
|
"rewards/accuracy_reward": 0.28472222983837125,
|
|
"rewards/brier_reward": 0.34975607991218566,
|
|
"rewards/confidence_uniqueness_reward": 0.35827080607414247,
|
|
"rewards/format_reward": 0.7129340171813965,
|
|
"rewards/frontier_aurc_reward": 0.30107017755508425,
|
|
"rewards/frontier_coverage_0": 0.30107017755508425,
|
|
"rewards/frontier_coverage_1": 0.30107017755508425,
|
|
"rewards/frontier_coverage_10": 0.30107017755508425,
|
|
"rewards/frontier_coverage_15": 0.30107017755508425,
|
|
"rewards/frontier_coverage_20": 0.30107017755508425,
|
|
"rewards/frontier_coverage_25": 0.30107017755508425,
|
|
"rewards/frontier_coverage_5": 0.30107017755508425,
|
|
"rewards/frontier_ece_reward": 0.30107017755508425,
|
|
"rewards/frontier_entropy_batch_reward": -0.6585487723350525,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.31558159589767454,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.24027777777777776,
|
|
"signal/accuracy_reward/group_std_mean": 0.37627485394477844,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07777777928858995,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15779079794883727,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15779079794883727,
|
|
"signal/advantage_abs_mean": 0.3536957919597626,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3536957919597626,
|
|
"signal/advantage_pre_scale_std": 0.4313755929470062,
|
|
"signal/advantage_std": 0.4313755929470062,
|
|
"signal/brier_reward/centered_abs_mean": 0.31298828125,
|
|
"signal/brier_reward/group_bin_occupancy": 0.545138888888889,
|
|
"signal/brier_reward/group_std_mean": 0.36775757670402526,
|
|
"signal/brier_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03129882961511612,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03129882961511612,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22144390940666198,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6322916666666667,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.27823981642723083,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022144390642642973,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022144390642642973,
|
|
"signal/format_reward/centered_abs_mean": 0.3566026449203491,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.42138834595680236,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17830132246017455,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.17830132246017455,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003888444369658828,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3110755383968353,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4229166666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3699175715446472,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031107554957270623,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031107554957270623,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4007949113845825,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.34236111111111117,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4584290623664856,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04007949084043503,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04007949084043503,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5634525142239435,
|
|
"calibration/batch_distribution_entropy": 0.2936840156784979,
|
|
"calibration/batch_entropy_100bins": 0.3574441819799151,
|
|
"calibration/batch_entropy_10bins": 0.2936840156784979,
|
|
"calibration/batch_entropy_50bins": 0.41566705111670643,
|
|
"calibration/batch_uniqueness": 0.5181815236994417,
|
|
"calibration/confidence_entropy": 0.22663220422136415,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5354899110079999,
|
|
"calibration/mean_confidence": 0.9149286672818105,
|
|
"calibration/prompt_uniqueness": 0.3991411197867409,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 4018.0,
|
|
"completions/max_terminated_length": 4018.0,
|
|
"completions/mean_length": 430.7875915527344,
|
|
"completions/mean_terminated_length": 435.9453552246094,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 44.0,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.0013605451676994562,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0061,
|
|
"num_tokens": 25749178.0,
|
|
"reward": 0.6798976540565491,
|
|
"reward_std": 0.34733850955963136,
|
|
"rewards/accuracy_reward": 0.3006076455116272,
|
|
"rewards/brier_reward": 0.4006872236728668,
|
|
"rewards/confidence_uniqueness_reward": 0.49280205368995667,
|
|
"rewards/format_reward": 0.9177083253860474,
|
|
"rewards/frontier_aurc_reward": 0.32862133979797364,
|
|
"rewards/frontier_coverage_0": 0.32862133979797364,
|
|
"rewards/frontier_coverage_1": 0.32862133979797364,
|
|
"rewards/frontier_coverage_10": 0.32862133979797364,
|
|
"rewards/frontier_coverage_15": 0.32862133979797364,
|
|
"rewards/frontier_coverage_20": 0.32862133979797364,
|
|
"rewards/frontier_coverage_25": 0.32862133979797364,
|
|
"rewards/frontier_coverage_5": 0.32862133979797364,
|
|
"rewards/frontier_ece_reward": 0.32862133979797364,
|
|
"rewards/frontier_entropy_batch_reward": -0.8433352708816528,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.31458876729011537,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23888888888888887,
|
|
"signal/accuracy_reward/group_std_mean": 0.3746976673603058,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0888888917863369,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15729438364505768,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15729438364505768,
|
|
"signal/advantage_abs_mean": 0.2811680108308792,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2811680108308792,
|
|
"signal/advantage_pre_scale_std": 0.3555997729301453,
|
|
"signal/advantage_std": 0.3555997729301453,
|
|
"signal/brier_reward/centered_abs_mean": 0.3005147516727448,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6149305555555555,
|
|
"signal/brier_reward/group_std_mean": 0.35394822955131533,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030051474645733833,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.030051474645733833,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18455613553524017,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6274305555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23438866436481476,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018455613404512405,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018455613404512405,
|
|
"signal/format_reward/centered_abs_mean": 0.13573133796453477,
|
|
"signal/format_reward/group_bin_occupancy": 0.22152777777777782,
|
|
"signal/format_reward/group_std_mean": 0.220550999045372,
|
|
"signal/format_reward/group_zero_std_frac": 0.22777777388691903,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.06786566898226738,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.06786566898226738,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038480616174638273,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.30784491300582884,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.48611111111111105,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36494665741920473,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03078449293971062,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03078449293971062,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24438310861587526,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.36006944444444444,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3511778712272644,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.02500000037252903,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02443831190466881,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02443831190466881,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.49393609341404227,
|
|
"calibration/batch_distribution_entropy": 0.3950132648234722,
|
|
"calibration/batch_entropy_100bins": 0.40505684224566296,
|
|
"calibration/batch_entropy_10bins": 0.3950132648234722,
|
|
"calibration/batch_entropy_50bins": 0.4693520387698801,
|
|
"calibration/batch_uniqueness": 0.61509530787946,
|
|
"calibration/buffer_distribution_entropy": 0.3091847349375323,
|
|
"calibration/buffer_entropy_100bins": 0.37384274971807285,
|
|
"calibration/buffer_entropy_10bins": 0.3091847349375323,
|
|
"calibration/buffer_entropy_50bins": 0.4345021124322783,
|
|
"calibration/confidence_entropy": 0.2933222613376684,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.029023746701846966,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4258668636956767,
|
|
"calibration/mean_confidence": 0.8878107373812835,
|
|
"calibration/prompt_uniqueness": 0.517297995778335,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010850694444444442,
|
|
"completions/max_length": 3979.6,
|
|
"completions/max_terminated_length": 3979.6,
|
|
"completions/mean_length": 432.4179748535156,
|
|
"completions/mean_terminated_length": 437.20750732421874,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 68.8,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.0028677526861429214,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0086,
|
|
"num_tokens": 33844329.0,
|
|
"reward": 0.7569576263427734,
|
|
"reward_std": 0.2728476107120514,
|
|
"rewards/accuracy_reward": 0.41449652910232543,
|
|
"rewards/brier_reward": 0.5286458790302276,
|
|
"rewards/confidence_uniqueness_reward": 0.606674587726593,
|
|
"rewards/format_reward": 0.9817708253860473,
|
|
"rewards/frontier_aurc_reward": 0.17920130817219615,
|
|
"rewards/frontier_coverage_0": 0.1886154913343489,
|
|
"rewards/frontier_coverage_1": 0.1886154913343489,
|
|
"rewards/frontier_coverage_10": 0.1886154913343489,
|
|
"rewards/frontier_coverage_15": 0.1886154913343489,
|
|
"rewards/frontier_coverage_20": 0.1886154913343489,
|
|
"rewards/frontier_coverage_25": 0.1886154913343489,
|
|
"rewards/frontier_coverage_5": 0.1886154913343489,
|
|
"rewards/frontier_ece_reward": 0.1644112183363177,
|
|
"rewards/frontier_entropy_batch_reward": -0.8989308953285218,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2998209595680237,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.24131944444444448,
|
|
"signal/accuracy_reward/group_std_mean": 0.3666124284267426,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.06944444701075554,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14991047978401184,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14991047978401184,
|
|
"signal/advantage_abs_mean": 0.2194227993488312,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2194227993488312,
|
|
"signal/advantage_pre_scale_std": 0.28129519820213317,
|
|
"signal/advantage_std": 0.28129519820213317,
|
|
"signal/brier_reward/centered_abs_mean": 0.26685882806777955,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6607638888888889,
|
|
"signal/brier_reward/group_std_mean": 0.32316548824310304,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02668588310480118,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02668588310480118,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15834780037403107,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6138888888888888,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.19463339745998381,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015834780223667622,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015834780223667622,
|
|
"signal/format_reward/centered_abs_mean": 0.03338758684694767,
|
|
"signal/format_reward/group_bin_occupancy": 0.16562499999999997,
|
|
"signal/format_reward/group_std_mean": 0.07192002534866333,
|
|
"signal/format_reward/group_zero_std_frac": 0.675000011920929,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016693793423473834,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016693793423473834,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.12922168229706585,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6350694444444445,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.1566169561818242,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.001615271106857108,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.001615271106857108,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1444099996238947,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.6125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18344281539320945,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1444099996238947,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.6125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18344281539320945,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1444099996238947,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.6125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18344281539320945,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1444099996238947,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.6125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18344281539320945,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1444099996238947,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.6125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18344281539320945,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1444099996238947,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.6125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18344281539320945,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1444099996238947,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.6125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18344281539320945,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018051250837743283,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.22200666069984437,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4690972222222222,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.26992476880550387,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02220066711306572,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02220066711306572,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16805205643177032,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3486111111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2819783270359039,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09166666865348816,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01680520586669445,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01680520586669445,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.40831880456732605,
|
|
"calibration/batch_distribution_entropy": 0.6097885070554001,
|
|
"calibration/batch_entropy_100bins": 0.48593398604284915,
|
|
"calibration/batch_entropy_10bins": 0.6097885070554001,
|
|
"calibration/batch_entropy_50bins": 0.5663091534135619,
|
|
"calibration/batch_uniqueness": 0.7425439382867337,
|
|
"calibration/buffer_distribution_entropy": 0.36603622224810134,
|
|
"calibration/buffer_entropy_100bins": 0.39999389908282235,
|
|
"calibration/buffer_entropy_10bins": 0.36603622224810134,
|
|
"calibration/buffer_entropy_50bins": 0.4646358566837005,
|
|
"calibration/confidence_entropy": 0.39686215013611503,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.07571801566579635,
|
|
"calibration/coverage@30%": 0.2670907759680606,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2710533311020483,
|
|
"calibration/mean_confidence": 0.8242252010281881,
|
|
"calibration/prompt_uniqueness": 0.6616427423448029,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00980902777777779,
|
|
"completions/max_length": 3722.6,
|
|
"completions/max_terminated_length": 3722.6,
|
|
"completions/mean_length": 459.39210205078126,
|
|
"completions/mean_terminated_length": 463.9351379394531,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 102.2,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.0009269694564864039,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0064,
|
|
"num_tokens": 42260974.0,
|
|
"reward": 0.7958746194839478,
|
|
"reward_std": 0.2274015724658966,
|
|
"rewards/accuracy_reward": 0.5085069417953492,
|
|
"rewards/brier_reward": 0.6438981890678406,
|
|
"rewards/confidence_uniqueness_reward": 0.734617817401886,
|
|
"rewards/format_reward": 0.9876736283302308,
|
|
"rewards/frontier_aurc_reward": -0.004628232168033719,
|
|
"rewards/frontier_coverage_0": 0.005646060802973807,
|
|
"rewards/frontier_coverage_1": 0.005646060802973807,
|
|
"rewards/frontier_coverage_10": 0.005646060802973807,
|
|
"rewards/frontier_coverage_15": 0.005646060802973807,
|
|
"rewards/frontier_coverage_20": 0.005646060802973807,
|
|
"rewards/frontier_coverage_25": 0.005646060802973807,
|
|
"rewards/frontier_coverage_5": 0.005646060802973807,
|
|
"rewards/frontier_ece_reward": 0.007948444318026304,
|
|
"rewards/frontier_entropy_batch_reward": -0.9129829168319702,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.28665364980697633,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23819444444444446,
|
|
"signal/accuracy_reward/group_std_mean": 0.3534803450107574,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.09444444552063942,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14332682490348816,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14332682490348816,
|
|
"signal/advantage_abs_mean": 0.18016738891601564,
|
|
"signal/advantage_pre_scale_abs_mean": 0.18016738891601564,
|
|
"signal/advantage_pre_scale_std": 0.238165420293808,
|
|
"signal/advantage_std": 0.238165420293808,
|
|
"signal/brier_reward/centered_abs_mean": 0.21931754648685456,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7795138888888888,
|
|
"signal/brier_reward/group_std_mean": 0.27221688628196716,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021931754797697066,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021931754797697066,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09854339063167572,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6871527777777777,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.12727494537830353,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009854339342564345,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009854339342564345,
|
|
"signal/format_reward/centered_abs_mean": 0.022319878824055193,
|
|
"signal/format_reward/group_bin_occupancy": 0.15138888888888888,
|
|
"signal/format_reward/group_std_mean": 0.047178071737289426,
|
|
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011159939412027597,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011159939412027597,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002872100844979286,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7059027777777778,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004377482458949089,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5901259980164466e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5901259980164466e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.0523149847984314,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.08000584244728089,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0523149847984314,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.08000584244728089,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0523149847984314,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.08000584244728089,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0523149847984314,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08000584244728089,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0523149847984314,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08000584244728089,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0523149847984314,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08000584244728089,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0523149847984314,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.08000584244728089,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00065393730183132,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.13553643673658372,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6496527777777777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16666682958602905,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013553644344210625,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013553644344210625,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1483454465866089,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30590277777777775,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.26824913918972015,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1527777798473835,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0148345448076725,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0148345448076725,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31340018732890096,
|
|
"calibration/batch_distribution_entropy": 0.7048907327472749,
|
|
"calibration/batch_entropy_100bins": 0.5451866323540866,
|
|
"calibration/batch_entropy_10bins": 0.7048907327472749,
|
|
"calibration/batch_entropy_50bins": 0.6235172362071362,
|
|
"calibration/batch_uniqueness": 0.7777645419569349,
|
|
"calibration/buffer_distribution_entropy": 0.4720834768392196,
|
|
"calibration/buffer_entropy_100bins": 0.4512809972000536,
|
|
"calibration/buffer_entropy_10bins": 0.4720834768392196,
|
|
"calibration/buffer_entropy_50bins": 0.5236436414920623,
|
|
"calibration/confidence_entropy": 0.5383263280850088,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.060188392145496705,
|
|
"calibration/coverage@25%": 0.17894412155077963,
|
|
"calibration/coverage@30%": 0.3775832099984736,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.12798357790902398,
|
|
"calibration/mean_confidence": 0.7155363782263905,
|
|
"calibration/prompt_uniqueness": 0.7025420849805335,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01553819444444442,
|
|
"completions/max_length": 3862.6,
|
|
"completions/max_terminated_length": 3862.6,
|
|
"completions/mean_length": 536.9459350585937,
|
|
"completions/mean_terminated_length": 545.4788208007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 122.2,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0006166549865156412,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.0102,
|
|
"num_tokens": 51556511.0,
|
|
"reward": 0.8471167922019959,
|
|
"reward_std": 0.19835625290870668,
|
|
"rewards/accuracy_reward": 0.5782118082046509,
|
|
"rewards/brier_reward": 0.7210497856140137,
|
|
"rewards/confidence_uniqueness_reward": 0.7651053071022034,
|
|
"rewards/format_reward": 0.9834201216697693,
|
|
"rewards/frontier_aurc_reward": -0.003613748401403427,
|
|
"rewards/frontier_coverage_0": -0.00830224696546793,
|
|
"rewards/frontier_coverage_1": -0.00830224696546793,
|
|
"rewards/frontier_coverage_10": -0.00830224696546793,
|
|
"rewards/frontier_coverage_15": -0.00830224696546793,
|
|
"rewards/frontier_coverage_20": -0.00830224696546793,
|
|
"rewards/frontier_coverage_25": -0.00830224696546793,
|
|
"rewards/frontier_coverage_5": -0.00830224696546793,
|
|
"rewards/frontier_ece_reward": 0.023493098840117455,
|
|
"rewards/frontier_entropy_batch_reward": -0.838923704624176,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.24400499165058137,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.22743055555555552,
|
|
"signal/accuracy_reward/group_std_mean": 0.30676281452178955,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.18055555820465088,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12200249582529069,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12200249582529069,
|
|
"signal/advantage_abs_mean": 0.15304453670978546,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15304453670978546,
|
|
"signal/advantage_pre_scale_std": 0.2149382084608078,
|
|
"signal/advantage_std": 0.2149382084608078,
|
|
"signal/brier_reward/centered_abs_mean": 0.15970109701156615,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8368055555555556,
|
|
"signal/brier_reward/group_std_mean": 0.20384239852428437,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015970110520720483,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015970110520720483,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11436907052993775,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.689236111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14522747993469237,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011436907574534416,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011436907574534416,
|
|
"signal/format_reward/centered_abs_mean": 0.02875976599752903,
|
|
"signal/format_reward/group_bin_occupancy": 0.15590277777777778,
|
|
"signal/format_reward/group_std_mean": 0.057394811511039735,
|
|
"signal/format_reward/group_zero_std_frac": 0.7527777791023255,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014379882998764516,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.014379882998764516,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016801425954326987,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002739586587995291,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1001783170504496e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1001783170504496e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.0830587849020958,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.11073667109012604,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.0830587849020958,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.11073667109012604,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.0830587849020958,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.11073667109012604,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0830587849020958,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11073667109012604,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0830587849020958,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11073667109012604,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0830587849020958,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11073667109012604,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.0830587849020958,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.11073667109012604,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001038234820589423,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08026944175362587,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7819444444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10268646031618119,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008026944752782584,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008026944752782584,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2555039495229721,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.41215277777777787,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38815844655036924,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.03888888955116272,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025550395622849463,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025550395622849463,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25956437104203445,
|
|
"calibration/batch_distribution_entropy": 0.8165243972425698,
|
|
"calibration/batch_entropy_100bins": 0.8075646239662813,
|
|
"calibration/batch_entropy_10bins": 0.8165243972425698,
|
|
"calibration/batch_entropy_50bins": 0.8399069478240373,
|
|
"calibration/batch_uniqueness": 0.9168564222845603,
|
|
"calibration/buffer_distribution_entropy": 0.5710273729435295,
|
|
"calibration/buffer_entropy_100bins": 0.5281526593776766,
|
|
"calibration/buffer_entropy_10bins": 0.5710273729435295,
|
|
"calibration/buffer_entropy_50bins": 0.601999214873284,
|
|
"calibration/confidence_entropy": 0.5454837996401404,
|
|
"calibration/coverage@0%": 0.0020887728459530026,
|
|
"calibration/coverage@1%": 0.0020887728459530026,
|
|
"calibration/coverage@10%": 0.06197192161099606,
|
|
"calibration/coverage@15%": 0.11850014659270083,
|
|
"calibration/coverage@20%": 0.3127652813079301,
|
|
"calibration/coverage@25%": 0.5564344484546024,
|
|
"calibration/coverage@30%": 0.6621659907782901,
|
|
"calibration/coverage@5%": 0.02454308093994778,
|
|
"calibration/ece": 0.11688668618444734,
|
|
"calibration/mean_confidence": 0.670672226113574,
|
|
"calibration/prompt_uniqueness": 0.8470008776094577,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020486111111111115,
|
|
"completions/max_length": 3877.4,
|
|
"completions/max_terminated_length": 3877.4,
|
|
"completions/mean_length": 578.4817749023438,
|
|
"completions/mean_terminated_length": 590.725341796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 146.4,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.0005679262103512883,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0128,
|
|
"num_tokens": 61298061.0,
|
|
"reward": 0.9099455237388611,
|
|
"reward_std": 0.1839153379201889,
|
|
"rewards/accuracy_reward": 0.61875,
|
|
"rewards/brier_reward": 0.7439757466316224,
|
|
"rewards/confidence_uniqueness_reward": 0.8963147759437561,
|
|
"rewards/format_reward": 0.9782118082046509,
|
|
"rewards/frontier_aurc_reward": -0.0029043381568044425,
|
|
"rewards/frontier_coverage_0": -0.01887217308394611,
|
|
"rewards/frontier_coverage_1": -0.01887217308394611,
|
|
"rewards/frontier_coverage_10": -0.01887217308394611,
|
|
"rewards/frontier_coverage_15": -0.01887217308394611,
|
|
"rewards/frontier_coverage_20": -0.01887217308394611,
|
|
"rewards/frontier_coverage_25": -0.01887217308394611,
|
|
"rewards/frontier_coverage_5": -0.01887217308394611,
|
|
"rewards/frontier_ece_reward": 0.02471662126481533,
|
|
"rewards/frontier_entropy_batch_reward": -0.5334846794605255,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.21307508647441864,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21701388888888892,
|
|
"signal/accuracy_reward/group_std_mean": 0.27209571599960325,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2638888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10653754323720932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10653754323720932,
|
|
"signal/advantage_abs_mean": 0.14091622233390808,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14091622233390808,
|
|
"signal/advantage_pre_scale_std": 0.20204322636127472,
|
|
"signal/advantage_std": 0.20204322636127472,
|
|
"signal/brier_reward/centered_abs_mean": 0.15218718945980073,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8690972222222222,
|
|
"signal/brier_reward/group_std_mean": 0.19625934958457947,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015218720026314258,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015218720026314258,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06924531385302543,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7503472222222223,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09717238694429398,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0069245313294231895,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0069245313294231895,
|
|
"signal/format_reward/centered_abs_mean": 0.03473849855363369,
|
|
"signal/format_reward/group_bin_occupancy": 0.15416666666666665,
|
|
"signal/format_reward/group_std_mean": 0.060436099767684937,
|
|
"signal/format_reward/group_zero_std_frac": 0.7666666626930236,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017369249276816844,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017369249276816844,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016358074499294162,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7302083333333333,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002670387364923954,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.044759276031982e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.044759276031982e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.12218185663223266,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8847222222222223,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.16369088590145112,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12218185663223266,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8847222222222223,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16369088590145112,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12218185663223266,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8847222222222223,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16369088590145112,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12218185663223266,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8847222222222223,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16369088590145112,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12218185663223266,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8847222222222223,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16369088590145112,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12218185663223266,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8847222222222223,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16369088590145112,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12218185663223266,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8847222222222223,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16369088590145112,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001527273189276457,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06000325083732605,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6788194444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08099779933691025,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006000325083732605,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006000325083732605,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4390486657619476,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7121527777777776,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.500614058971405,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04390486851334572,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04390486851334572,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30461219130156075,
|
|
"calibration/batch_distribution_entropy": 0.97860829400174,
|
|
"calibration/batch_entropy_100bins": 0.9486484674822325,
|
|
"calibration/batch_entropy_10bins": 0.97860829400174,
|
|
"calibration/batch_entropy_50bins": 0.9682282190043502,
|
|
"calibration/batch_uniqueness": 0.9518603939567309,
|
|
"calibration/buffer_distribution_entropy": 0.6674029515556968,
|
|
"calibration/buffer_entropy_100bins": 0.6329905407674977,
|
|
"calibration/buffer_entropy_10bins": 0.6674029515556968,
|
|
"calibration/buffer_entropy_50bins": 0.696431956314964,
|
|
"calibration/confidence_entropy": 0.5217791211913715,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.06248734222348997,
|
|
"calibration/coverage@20%": 0.11387957878723051,
|
|
"calibration/coverage@25%": 0.34223374312873,
|
|
"calibration/coverage@30%": 0.5964517660517661,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.21549805876997757,
|
|
"calibration/mean_confidence": 0.5352754370110747,
|
|
"calibration/prompt_uniqueness": 0.8878912054727662,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020572916666666653,
|
|
"completions/max_length": 3790.6,
|
|
"completions/max_terminated_length": 3790.6,
|
|
"completions/mean_length": 589.0677978515625,
|
|
"completions/mean_terminated_length": 601.5003784179687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 152.4,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0004772288375534117,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0183,
|
|
"num_tokens": 71203642.0,
|
|
"reward": 0.9513100028038025,
|
|
"reward_std": 0.17211353182792663,
|
|
"rewards/accuracy_reward": 0.6349826335906983,
|
|
"rewards/brier_reward": 0.7103789806365967,
|
|
"rewards/confidence_uniqueness_reward": 0.9307293176651001,
|
|
"rewards/format_reward": 0.97734375,
|
|
"rewards/frontier_aurc_reward": -0.002656676573678851,
|
|
"rewards/frontier_coverage_0": -0.04805164374411106,
|
|
"rewards/frontier_coverage_1": -0.04805164374411106,
|
|
"rewards/frontier_coverage_10": -0.04805164374411106,
|
|
"rewards/frontier_coverage_15": -0.04805164374411106,
|
|
"rewards/frontier_coverage_20": -0.04805164374411106,
|
|
"rewards/frontier_coverage_25": -0.04805164374411106,
|
|
"rewards/frontier_coverage_5": -0.04805164374411106,
|
|
"rewards/frontier_ece_reward": 0.015457052178680897,
|
|
"rewards/frontier_entropy_batch_reward": -0.16271998584270478,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19670681655406952,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21631944444444445,
|
|
"signal/accuracy_reward/group_std_mean": 0.2593521386384964,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2694444447755814,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09835340827703476,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09835340827703476,
|
|
"signal/advantage_abs_mean": 0.12720865309238433,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12720865309238433,
|
|
"signal/advantage_pre_scale_std": 0.19325639307498932,
|
|
"signal/advantage_std": 0.19325639307498932,
|
|
"signal/brier_reward/centered_abs_mean": 0.21643259525299072,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9309027777777779,
|
|
"signal/brier_reward/group_std_mean": 0.2640606015920639,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02164325937628746,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02164325937628746,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047277380526065824,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7392361111111112,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07938017547130585,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004727738164365292,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004727738164365292,
|
|
"signal/format_reward/centered_abs_mean": 0.03805881068110466,
|
|
"signal/format_reward/group_bin_occupancy": 0.15902777777777777,
|
|
"signal/format_reward/group_std_mean": 0.06856417283415794,
|
|
"signal/format_reward/group_zero_std_frac": 0.7277777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01902940534055233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01902940534055233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017690456472337246,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6746527777777778,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00304986541159451,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.211307037214283e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.211307037214283e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23642539083957673,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.9170138888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3045207381248474,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23642539083957673,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.9170138888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3045207381248474,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23642539083957673,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.9170138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3045207381248474,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23642539083957673,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9170138888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3045207381248474,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23642539083957673,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9170138888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3045207381248474,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23642539083957673,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9170138888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3045207381248474,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23642539083957673,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.9170138888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3045207381248474,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002955317497253418,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06959517598152161,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7576388888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09464571475982667,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006959517952054739,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006959517952054739,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2507960021495819,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8145833333333332,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32827151417732237,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025079600140452386,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025079600140452386,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2344727023359694,
|
|
"calibration/batch_distribution_entropy": 0.9518525106541613,
|
|
"calibration/batch_entropy_100bins": 0.9412130004420993,
|
|
"calibration/batch_entropy_10bins": 0.9518525106541613,
|
|
"calibration/batch_entropy_50bins": 0.9527945423184636,
|
|
"calibration/batch_uniqueness": 0.9461262919998014,
|
|
"calibration/buffer_distribution_entropy": 0.7459719739155918,
|
|
"calibration/buffer_entropy_100bins": 0.7118780035095448,
|
|
"calibration/buffer_entropy_10bins": 0.7459719739155918,
|
|
"calibration/buffer_entropy_50bins": 0.7661675140410678,
|
|
"calibration/confidence_entropy": 0.4776737674852377,
|
|
"calibration/coverage@0%": 0.012736259244119067,
|
|
"calibration/coverage@1%": 0.012736259244119067,
|
|
"calibration/coverage@10%": 0.02381805343936973,
|
|
"calibration/coverage@15%": 0.13136398524975118,
|
|
"calibration/coverage@20%": 0.2600392937120426,
|
|
"calibration/coverage@25%": 0.6212789086899664,
|
|
"calibration/coverage@30%": 0.9968337730870713,
|
|
"calibration/coverage@5%": 0.012736259244119067,
|
|
"calibration/ece": 0.19628602513678026,
|
|
"calibration/mean_confidence": 0.6154520747384761,
|
|
"calibration/prompt_uniqueness": 0.8831628620958897,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.023611111111111138,
|
|
"completions/max_length": 3450.0,
|
|
"completions/max_terminated_length": 3450.0,
|
|
"completions/mean_length": 588.0378540039062,
|
|
"completions/mean_terminated_length": 602.3594970703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 152.4,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.00051538908155635,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0195,
|
|
"num_tokens": 81113102.0,
|
|
"reward": 0.9507439851760864,
|
|
"reward_std": 0.1727825313806534,
|
|
"rewards/accuracy_reward": 0.6421006917953491,
|
|
"rewards/brier_reward": 0.7137632369995117,
|
|
"rewards/confidence_uniqueness_reward": 0.9256102681159973,
|
|
"rewards/format_reward": 0.97578125,
|
|
"rewards/frontier_aurc_reward": -0.002600804064422846,
|
|
"rewards/frontier_coverage_0": -0.033070035930722955,
|
|
"rewards/frontier_coverage_1": -0.033070035930722955,
|
|
"rewards/frontier_coverage_10": -0.033070035930722955,
|
|
"rewards/frontier_coverage_15": -0.033070035930722955,
|
|
"rewards/frontier_coverage_20": -0.033070035930722955,
|
|
"rewards/frontier_coverage_25": -0.033070035930722955,
|
|
"rewards/frontier_coverage_5": -0.033070035930722955,
|
|
"rewards/frontier_ece_reward": 0.023521846160292625,
|
|
"rewards/frontier_entropy_batch_reward": -0.2156035676598549,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19353841245174408,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21631944444444443,
|
|
"signal/accuracy_reward/group_std_mean": 0.2572809010744095,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09676920622587204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09676920622587204,
|
|
"signal/advantage_abs_mean": 0.12924040853977203,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12924040853977203,
|
|
"signal/advantage_pre_scale_std": 0.1953139752149582,
|
|
"signal/advantage_std": 0.1953139752149582,
|
|
"signal/brier_reward/centered_abs_mean": 0.23464938700199128,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8972222222222221,
|
|
"signal/brier_reward/group_std_mean": 0.2843640446662903,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02346493937075138,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02346493937075138,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05085535049438476,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7968749999999999,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07848654761910438,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005085535254329443,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005085535254329443,
|
|
"signal/format_reward/centered_abs_mean": 0.03865559995174408,
|
|
"signal/format_reward/group_bin_occupancy": 0.1545138888888889,
|
|
"signal/format_reward/group_std_mean": 0.06427749693393707,
|
|
"signal/format_reward/group_zero_std_frac": 0.7638888835906983,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01932779997587204,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01932779997587204,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002126035187393427,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7104166666666668,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032929918263107536,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6575440278975294e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6575440278975294e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23452837765216827,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.31311619877815244,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23452837765216827,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.31311619877815244,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23452837765216827,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.31311619877815244,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23452837765216827,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.31311619877815244,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23452837765216827,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.31311619877815244,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23452837765216827,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.31311619877815244,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23452837765216827,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8694444444444445,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.31311619877815244,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002931604813784361,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08764429241418839,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.11243927627801895,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00876442939043045,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00876442939043045,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3069328278303146,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3807151556015015,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030693282932043077,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030693282932043077,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.40581392002668315,
|
|
"calibration/batch_distribution_entropy": 0.9864032887347619,
|
|
"calibration/batch_entropy_100bins": 0.9648712886165287,
|
|
"calibration/batch_entropy_10bins": 0.9864032887347619,
|
|
"calibration/batch_entropy_50bins": 0.9766999165423931,
|
|
"calibration/batch_uniqueness": 0.953430626200465,
|
|
"calibration/buffer_distribution_entropy": 0.7885463322323418,
|
|
"calibration/buffer_entropy_100bins": 0.7637818833081469,
|
|
"calibration/buffer_entropy_10bins": 0.7885463322323418,
|
|
"calibration/buffer_entropy_50bins": 0.8096014118372388,
|
|
"calibration/confidence_entropy": 0.48178658393199997,
|
|
"calibration/coverage@0%": 0.00478037274518404,
|
|
"calibration/coverage@1%": 0.00478037274518404,
|
|
"calibration/coverage@10%": 0.00478037274518404,
|
|
"calibration/coverage@15%": 0.01598037274518404,
|
|
"calibration/coverage@20%": 0.018613443611325775,
|
|
"calibration/coverage@25%": 0.06659603789230381,
|
|
"calibration/coverage@30%": 0.11990588262883446,
|
|
"calibration/coverage@5%": 0.00478037274518404,
|
|
"calibration/ece": 0.2005017801122319,
|
|
"calibration/mean_confidence": 0.5256280232531849,
|
|
"calibration/prompt_uniqueness": 0.8879285110994667,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017274305555555536,
|
|
"completions/max_length": 3381.8,
|
|
"completions/max_terminated_length": 3381.8,
|
|
"completions/mean_length": 594.59775390625,
|
|
"completions/mean_terminated_length": 605.0348510742188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 157.6,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0005370384315028787,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.0163,
|
|
"num_tokens": 91060468.0,
|
|
"reward": 0.9574363589286804,
|
|
"reward_std": 0.16030279099941253,
|
|
"rewards/accuracy_reward": 0.6344617962837219,
|
|
"rewards/brier_reward": 0.7066903710365295,
|
|
"rewards/confidence_uniqueness_reward": 0.9360496401786804,
|
|
"rewards/format_reward": 0.9825520992279053,
|
|
"rewards/frontier_aurc_reward": -0.0024419894441962244,
|
|
"rewards/frontier_coverage_0": -0.03829344231635332,
|
|
"rewards/frontier_coverage_1": -0.03829344231635332,
|
|
"rewards/frontier_coverage_10": -0.03829344231635332,
|
|
"rewards/frontier_coverage_15": -0.03829344231635332,
|
|
"rewards/frontier_coverage_20": -0.03829344231635332,
|
|
"rewards/frontier_coverage_25": -0.03829344231635332,
|
|
"rewards/frontier_coverage_5": -0.03829344231635332,
|
|
"rewards/frontier_ece_reward": 0.015390362963080407,
|
|
"rewards/frontier_entropy_batch_reward": -0.1350240170955658,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18443467915058137,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21006944444444442,
|
|
"signal/accuracy_reward/group_std_mean": 0.24160374104976653,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3194444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09221733957529069,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09221733957529069,
|
|
"signal/advantage_abs_mean": 0.11947052925825119,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11947052925825119,
|
|
"signal/advantage_pre_scale_std": 0.18084822595119476,
|
|
"signal/advantage_std": 0.18084822595119476,
|
|
"signal/brier_reward/centered_abs_mean": 0.24059803187847137,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9128472222222221,
|
|
"signal/brier_reward/group_std_mean": 0.2905759453773499,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024059804528951644,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024059804528951644,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03946094214916229,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7940972222222223,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06758813932538033,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003946094121783972,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003946094121783972,
|
|
"signal/format_reward/centered_abs_mean": 0.02996419295668602,
|
|
"signal/format_reward/group_bin_occupancy": 0.15381944444444443,
|
|
"signal/format_reward/group_std_mean": 0.05656718313694,
|
|
"signal/format_reward/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01498209647834301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01498209647834301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017862386535853147,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7190972222222223,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027869833167642353,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2327983970171772e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2327983970171772e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.26630950570106504,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8913194444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.34513433575630187,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.26630950570106504,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8913194444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.34513433575630187,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.26630950570106504,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8913194444444444,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.34513433575630187,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.26630950570106504,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8913194444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.34513433575630187,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.26630950570106504,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8913194444444444,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.34513433575630187,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.26630950570106504,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8913194444444444,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.34513433575630187,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.26630950570106504,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8913194444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.34513433575630187,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033288690727204086,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08141317814588547,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8072916666666666,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10631832182407379,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008141317777335644,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008141317777335644,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22239840626716614,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2998348593711853,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022239841893315314,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022239841893315314,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.2109596362293293,
|
|
"eval_calibration/batch_distribution_entropy": 0.9226056413086375,
|
|
"eval_calibration/batch_entropy_100bins": 0.6951142630838091,
|
|
"eval_calibration/batch_entropy_10bins": 0.9226056413086375,
|
|
"eval_calibration/batch_entropy_50bins": 0.7795849121323298,
|
|
"eval_calibration/batch_uniqueness": 0.8931600654699965,
|
|
"eval_calibration/buffer_distribution_entropy": 0.815512391941836,
|
|
"eval_calibration/buffer_entropy_100bins": 0.7914134740073022,
|
|
"eval_calibration/buffer_entropy_10bins": 0.815512391941836,
|
|
"eval_calibration/buffer_entropy_50bins": 0.8336375838667806,
|
|
"eval_calibration/confidence_entropy": 0.49500069308362776,
|
|
"eval_calibration/coverage@0%": 0.13205645161290322,
|
|
"eval_calibration/coverage@1%": 0.13205645161290322,
|
|
"eval_calibration/coverage@10%": 0.26915322580645157,
|
|
"eval_calibration/coverage@15%": 0.3790322580645162,
|
|
"eval_calibration/coverage@20%": 0.6399529569892474,
|
|
"eval_calibration/coverage@25%": 0.7288306451612904,
|
|
"eval_calibration/coverage@30%": 0.8385416666666666,
|
|
"eval_calibration/coverage@5%": 0.13205645161290322,
|
|
"eval_calibration/ece": 0.25606512017994004,
|
|
"eval_calibration/mean_confidence": 0.5449926770716114,
|
|
"eval_calibration/prompt_uniqueness": 0.8931600654699965,
|
|
"eval_completions/clipped_ratio": 0.018229166666666668,
|
|
"eval_completions/max_length": 2121.5,
|
|
"eval_completions/max_terminated_length": 2121.5,
|
|
"eval_completions/mean_length": 582.2923787434896,
|
|
"eval_completions/mean_terminated_length": 593.1960754394531,
|
|
"eval_completions/min_length": 46.5,
|
|
"eval_completions/min_terminated_length": 204.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 91060468.0,
|
|
"eval_reward": 0.8969475229581197,
|
|
"eval_reward_std": 0.25840714077154797,
|
|
"eval_rewards/accuracy_reward": 0.6249999900658926,
|
|
"eval_rewards/brier_reward": 0.7243680159250895,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8756765027840933,
|
|
"eval_rewards/format_reward": 0.9774305621782938,
|
|
"eval_rewards/frontier_aurc_reward": -0.0024169180736256144,
|
|
"eval_rewards/frontier_coverage_0": -0.014742235808322826,
|
|
"eval_rewards/frontier_coverage_1": -0.014742235808322826,
|
|
"eval_rewards/frontier_coverage_10": -0.014742235808322826,
|
|
"eval_rewards/frontier_coverage_15": -0.014742235808322826,
|
|
"eval_rewards/frontier_coverage_20": -0.014742235808322826,
|
|
"eval_rewards/frontier_coverage_25": -0.014742235808322826,
|
|
"eval_rewards/frontier_coverage_5": -0.014742235808322826,
|
|
"eval_rewards/frontier_ece_reward": 0.015066012740135193,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6445865134398142,
|
|
"eval_runtime": 205.5472,
|
|
"eval_samples_per_second": 4.865,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4510633647441864,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4817399134238561,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2255316823720932,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2255316823720932,
|
|
"eval_signal/advantage_abs_mean": 0.21911720434824625,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21911720434824625,
|
|
"eval_signal/advantage_pre_scale_std": 0.2567944601178169,
|
|
"eval_signal/advantage_std": 0.2567944601178169,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.23708807677030563,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.9201388888888888,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2897856483856837,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023708807304501534,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.023708807304501534,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06476977219184239,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3819444444444444,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1230657051006953,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006476977374404669,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006476977374404669,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.043077257461845875,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.19444444444444445,
|
|
"eval_signal/format_reward/group_std_mean": 0.10973560561736424,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.4444444527228673,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.021538628730922937,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.021538628730922937,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020677158997083702,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7222222222222222,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0035327961280321083,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.584644911015251e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.584644911015251e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2857043494780858,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9479166666666666,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.39817163348197937,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2857043494780858,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9479166666666666,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.39817163348197937,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2857043494780858,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9479166666666666,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.39817163348197937,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2857043494780858,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9479166666666666,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.39817163348197937,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2857043494780858,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9479166666666666,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.39817163348197937,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2857043494780858,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9479166666666666,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.39817163348197937,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2857043494780858,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9479166666666666,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.39817163348197937,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035713044150422015,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07295310931901137,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8715277777777777,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0984811931848526,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007295310885335009,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007295310885335009,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3177054176727931,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3194444444444444,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33345575133959454,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031770541022221245,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031770541022221245,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2694222235390976,
|
|
"calibration/batch_distribution_entropy": 0.9743103185657492,
|
|
"calibration/batch_entropy_100bins": 0.9569912029449112,
|
|
"calibration/batch_entropy_10bins": 0.9743103185657492,
|
|
"calibration/batch_entropy_50bins": 0.9695456736926709,
|
|
"calibration/batch_uniqueness": 0.9509898009239965,
|
|
"calibration/buffer_distribution_entropy": 0.8302606383123452,
|
|
"calibration/buffer_entropy_100bins": 0.807123888242816,
|
|
"calibration/buffer_entropy_10bins": 0.8302606383123452,
|
|
"calibration/buffer_entropy_50bins": 0.8468961491187944,
|
|
"calibration/confidence_entropy": 0.5156970864044457,
|
|
"calibration/coverage@0%": 0.0036787068283131276,
|
|
"calibration/coverage@1%": 0.0036787068283131276,
|
|
"calibration/coverage@10%": 0.023101279006790817,
|
|
"calibration/coverage@15%": 0.06956951427694946,
|
|
"calibration/coverage@20%": 0.3806686646144719,
|
|
"calibration/coverage@25%": 0.5535145450606225,
|
|
"calibration/coverage@30%": 0.7110186806437707,
|
|
"calibration/coverage@5%": 0.0036787068283131276,
|
|
"calibration/ece": 0.1878179475943192,
|
|
"calibration/mean_confidence": 0.5640014221751124,
|
|
"calibration/prompt_uniqueness": 0.8892825716455885,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018836805555555558,
|
|
"completions/max_length": 3432.4,
|
|
"completions/max_terminated_length": 3432.4,
|
|
"completions/mean_length": 611.9341186523437,
|
|
"completions/mean_terminated_length": 623.7887573242188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 145.4,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.00044835961307398975,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0164,
|
|
"num_tokens": 101190525.0,
|
|
"reward": 0.9691686749458313,
|
|
"reward_std": 0.15596783459186553,
|
|
"rewards/accuracy_reward": 0.6544270634651184,
|
|
"rewards/brier_reward": 0.728352153301239,
|
|
"rewards/confidence_uniqueness_reward": 0.9341305613517761,
|
|
"rewards/format_reward": 0.9808159828186035,
|
|
"rewards/frontier_aurc_reward": -0.0021917944541200995,
|
|
"rewards/frontier_coverage_0": -0.03554604309611022,
|
|
"rewards/frontier_coverage_1": -0.03554604309611022,
|
|
"rewards/frontier_coverage_10": -0.03554604309611022,
|
|
"rewards/frontier_coverage_15": -0.03554604309611022,
|
|
"rewards/frontier_coverage_20": -0.03554604309611022,
|
|
"rewards/frontier_coverage_25": -0.03554604309611022,
|
|
"rewards/frontier_coverage_5": -0.03554604309611022,
|
|
"rewards/frontier_ece_reward": 0.015853497385978698,
|
|
"rewards/frontier_entropy_batch_reward": -0.13148798942565917,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1805935323238373,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20694444444444443,
|
|
"signal/accuracy_reward/group_std_mean": 0.23551449477672576,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09029676616191865,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09029676616191865,
|
|
"signal/advantage_abs_mean": 0.1168292984366417,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1168292984366417,
|
|
"signal/advantage_pre_scale_std": 0.18130592703819276,
|
|
"signal/advantage_std": 0.18130592703819276,
|
|
"signal/brier_reward/centered_abs_mean": 0.2102464973926544,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9072916666666668,
|
|
"signal/brier_reward/group_std_mean": 0.2576879024505615,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02102465070784092,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02102465070784092,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.040306436270475386,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7975694444444443,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06543851867318154,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004030643822625279,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004030643822625279,
|
|
"signal/format_reward/centered_abs_mean": 0.03095160648226738,
|
|
"signal/format_reward/group_bin_occupancy": 0.15138888888888888,
|
|
"signal/format_reward/group_std_mean": 0.05426007434725762,
|
|
"signal/format_reward/group_zero_std_frac": 0.7888888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01547580324113369,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01547580324113369,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001665117172524333,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7086805555555556,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026435004081577064,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.081396560242865e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.081396560242865e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2305227130651474,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.30123440027236936,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2305227130651474,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30123440027236936,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2305227130651474,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30123440027236936,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2305227130651474,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30123440027236936,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2305227130651474,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30123440027236936,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2305227130651474,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.30123440027236936,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2305227130651474,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30123440027236936,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028815338853746654,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06926655918359756,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7784722222222222,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09231365174055099,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006926656048744917,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006926656048744917,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21102777123451233,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8166666666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.283634626865387,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021102776750922203,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021102776750922203,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3307035914636969,
|
|
"calibration/batch_distribution_entropy": 0.9677243577600381,
|
|
"calibration/batch_entropy_100bins": 0.9571977793903559,
|
|
"calibration/batch_entropy_10bins": 0.9677243577600381,
|
|
"calibration/batch_entropy_50bins": 0.9681875033490638,
|
|
"calibration/batch_uniqueness": 0.9505193455375718,
|
|
"calibration/buffer_distribution_entropy": 0.8528815324002004,
|
|
"calibration/buffer_entropy_100bins": 0.8344222403886594,
|
|
"calibration/buffer_entropy_10bins": 0.8528815324002004,
|
|
"calibration/buffer_entropy_50bins": 0.8685746685852462,
|
|
"calibration/confidence_entropy": 0.5152009551346997,
|
|
"calibration/coverage@0%": 0.01153219045162198,
|
|
"calibration/coverage@1%": 0.01153219045162198,
|
|
"calibration/coverage@10%": 0.08744842081811413,
|
|
"calibration/coverage@15%": 0.22566831610607224,
|
|
"calibration/coverage@20%": 0.3026316668914125,
|
|
"calibration/coverage@25%": 0.37020823239169576,
|
|
"calibration/coverage@30%": 0.43324681560185196,
|
|
"calibration/coverage@5%": 0.032998159038009414,
|
|
"calibration/ece": 0.19605623757236762,
|
|
"calibration/mean_confidence": 0.5866015536543909,
|
|
"calibration/prompt_uniqueness": 0.8868937097156987,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015798611111111117,
|
|
"completions/max_length": 3490.0,
|
|
"completions/max_terminated_length": 3490.0,
|
|
"completions/mean_length": 626.966845703125,
|
|
"completions/mean_terminated_length": 637.0339233398438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 158.4,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.00042762517114169896,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0143,
|
|
"num_tokens": 111509759.0,
|
|
"reward": 0.9568191528320312,
|
|
"reward_std": 0.15329523682594298,
|
|
"rewards/accuracy_reward": 0.6266492962837219,
|
|
"rewards/brier_reward": 0.7438425660133362,
|
|
"rewards/confidence_uniqueness_reward": 0.9345135450363159,
|
|
"rewards/format_reward": 0.9841145753860474,
|
|
"rewards/frontier_aurc_reward": -0.0022769244387745857,
|
|
"rewards/frontier_coverage_0": -0.002498930087313056,
|
|
"rewards/frontier_coverage_1": -0.002498930087313056,
|
|
"rewards/frontier_coverage_10": -0.002498930087313056,
|
|
"rewards/frontier_coverage_15": -0.002498930087313056,
|
|
"rewards/frontier_coverage_20": -0.002498930087313056,
|
|
"rewards/frontier_coverage_25": -0.002498930087313056,
|
|
"rewards/frontier_coverage_5": -0.002498930087313056,
|
|
"rewards/frontier_ece_reward": 0.02260393425822258,
|
|
"rewards/frontier_entropy_batch_reward": -0.18411682844161986,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18255750834941864,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21145833333333336,
|
|
"signal/accuracy_reward/group_std_mean": 0.24254016876220702,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3083333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09127875417470932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09127875417470932,
|
|
"signal/advantage_abs_mean": 0.11400031745433807,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11400031745433807,
|
|
"signal/advantage_pre_scale_std": 0.17615911066532136,
|
|
"signal/advantage_std": 0.17615911066532136,
|
|
"signal/brier_reward/centered_abs_mean": 0.19489111006259918,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/brier_reward/group_std_mean": 0.24229688942432404,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019489111378788948,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019489111378788948,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037897521257400514,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.062398982048034665,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003789752395823598,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003789752395823598,
|
|
"signal/format_reward/centered_abs_mean": 0.02632921040058136,
|
|
"signal/format_reward/group_bin_occupancy": 0.1496527777777778,
|
|
"signal/format_reward/group_std_mean": 0.04854804500937462,
|
|
"signal/format_reward/group_zero_std_frac": 0.8027777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01316460520029068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01316460520029068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019377078860998154,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7121527777777779,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030613655224442484,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.422134857624769e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.422134857624769e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2110768437385559,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8638888888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.28098778128623964,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2110768437385559,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8638888888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28098778128623964,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2110768437385559,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8638888888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28098778128623964,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2110768437385559,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8638888888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28098778128623964,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2110768437385559,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8638888888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28098778128623964,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2110768437385559,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8638888888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28098778128623964,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2110768437385559,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8638888888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28098778128623964,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002638460695743561,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06807545423507691,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7677083333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08898028582334519,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006807545572519303,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006807545572519303,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2613932341337204,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8180555555555558,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33782604336738586,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02613932266831398,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02613932266831398,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23300829060517123,
|
|
"calibration/batch_distribution_entropy": 0.9831016338957796,
|
|
"calibration/batch_entropy_100bins": 0.9632753017365372,
|
|
"calibration/batch_entropy_10bins": 0.9831016338957796,
|
|
"calibration/batch_entropy_50bins": 0.9759229135714376,
|
|
"calibration/batch_uniqueness": 0.9525056216175093,
|
|
"calibration/buffer_distribution_entropy": 0.8715169058505172,
|
|
"calibration/buffer_entropy_100bins": 0.8564367522033102,
|
|
"calibration/buffer_entropy_10bins": 0.8715169058505172,
|
|
"calibration/buffer_entropy_50bins": 0.8863222147297156,
|
|
"calibration/confidence_entropy": 0.48784240565714115,
|
|
"calibration/coverage@0%": 0.02786145806735325,
|
|
"calibration/coverage@1%": 0.02786145806735325,
|
|
"calibration/coverage@10%": 0.1591952861236417,
|
|
"calibration/coverage@15%": 0.4360120187533691,
|
|
"calibration/coverage@20%": 0.5692992045422305,
|
|
"calibration/coverage@25%": 0.6198779772037336,
|
|
"calibration/coverage@30%": 0.7344086021505376,
|
|
"calibration/coverage@5%": 0.04193082921950804,
|
|
"calibration/ece": 0.19008310139291312,
|
|
"calibration/mean_confidence": 0.5527640346426371,
|
|
"calibration/prompt_uniqueness": 0.87965394467687,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010416666666666675,
|
|
"completions/max_length": 3113.2,
|
|
"completions/max_terminated_length": 3113.2,
|
|
"completions/mean_length": 610.5935913085938,
|
|
"completions/mean_terminated_length": 616.9865234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 147.4,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.0005515015218406916,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0085,
|
|
"num_tokens": 121637845.0,
|
|
"reward": 0.9779425144195557,
|
|
"reward_std": 0.13538099378347396,
|
|
"rewards/accuracy_reward": 0.6507812380790711,
|
|
"rewards/brier_reward": 0.7501181960105896,
|
|
"rewards/confidence_uniqueness_reward": 0.9430691480636597,
|
|
"rewards/format_reward": 0.9895833373069763,
|
|
"rewards/frontier_aurc_reward": -0.0019629735965281726,
|
|
"rewards/frontier_coverage_0": -0.008767739811446518,
|
|
"rewards/frontier_coverage_1": -0.008767739811446518,
|
|
"rewards/frontier_coverage_10": -0.008767739811446518,
|
|
"rewards/frontier_coverage_15": -0.008767739811446518,
|
|
"rewards/frontier_coverage_20": -0.008767739811446518,
|
|
"rewards/frontier_coverage_25": -0.008767739811446518,
|
|
"rewards/frontier_coverage_5": -0.008767739811446518,
|
|
"rewards/frontier_ece_reward": 0.020754358358681203,
|
|
"rewards/frontier_entropy_batch_reward": -0.12842243015766144,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.167333984375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20243055555555553,
|
|
"signal/accuracy_reward/group_std_mean": 0.2216338872909546,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0836669921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0836669921875,
|
|
"signal/advantage_abs_mean": 0.09993359893560409,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09993359893560409,
|
|
"signal/advantage_pre_scale_std": 0.15722057819366456,
|
|
"signal/advantage_std": 0.15722057819366456,
|
|
"signal/brier_reward/centered_abs_mean": 0.19362751245498658,
|
|
"signal/brier_reward/group_bin_occupancy": 0.875,
|
|
"signal/brier_reward/group_std_mean": 0.24217391312122344,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019362751767039298,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019362751767039298,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028328100219368935,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8430555555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04791910648345947,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028328101616352797,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028328101616352797,
|
|
"signal/format_reward/centered_abs_mean": 0.01808810755610466,
|
|
"signal/format_reward/group_bin_occupancy": 0.1440972222222222,
|
|
"signal/format_reward/group_std_mean": 0.03565124273300171,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00904405377805233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00904405377805233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016498573124408722,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6878472222222223,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026893588714301587,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0623216914827936e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0623216914827936e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2315441280603409,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3030831813812256,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2315441280603409,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3030831813812256,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2315441280603409,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3030831813812256,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2315441280603409,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3030831813812256,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2315441280603409,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3030831813812256,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2315441280603409,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3030831813812256,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2315441280603409,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3030831813812256,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002894301526248455,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06531385183334351,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7784722222222221,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08503876328468322,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006531385611742735,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006531385611742735,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20490280091762542,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7586805555555556,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.27364385724067686,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02049028016626835,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02049028016626835,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29206435720557744,
|
|
"calibration/batch_distribution_entropy": 0.967741577432348,
|
|
"calibration/batch_entropy_100bins": 0.9547615908625235,
|
|
"calibration/batch_entropy_10bins": 0.967741577432348,
|
|
"calibration/batch_entropy_50bins": 0.9660211465920273,
|
|
"calibration/batch_uniqueness": 0.9491694668612134,
|
|
"calibration/buffer_distribution_entropy": 0.8853528168662106,
|
|
"calibration/buffer_entropy_100bins": 0.8734316453290203,
|
|
"calibration/buffer_entropy_10bins": 0.8853528168662106,
|
|
"calibration/buffer_entropy_50bins": 0.8995535410048392,
|
|
"calibration/confidence_entropy": 0.47921780511639545,
|
|
"calibration/coverage@0%": 0.008454719126847999,
|
|
"calibration/coverage@1%": 0.008454719126847999,
|
|
"calibration/coverage@10%": 0.10795939337610916,
|
|
"calibration/coverage@15%": 0.16163093522390154,
|
|
"calibration/coverage@20%": 0.20303708239739757,
|
|
"calibration/coverage@25%": 0.27123720908418275,
|
|
"calibration/coverage@30%": 0.46396145357735036,
|
|
"calibration/coverage@5%": 0.008454719126847999,
|
|
"calibration/ece": 0.1539259917359437,
|
|
"calibration/mean_confidence": 0.590760119025805,
|
|
"calibration/prompt_uniqueness": 0.8780244614506524,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01163194444444442,
|
|
"completions/max_length": 3017.8,
|
|
"completions/max_terminated_length": 3017.8,
|
|
"completions/mean_length": 598.3328247070312,
|
|
"completions/mean_terminated_length": 605.3543090820312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 137.2,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.00044609271571971476,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.01,
|
|
"num_tokens": 131608783.0,
|
|
"reward": 0.964812970161438,
|
|
"reward_std": 0.13441329300403596,
|
|
"rewards/accuracy_reward": 0.6355902791023255,
|
|
"rewards/brier_reward": 0.7468560457229614,
|
|
"rewards/confidence_uniqueness_reward": 0.9387478590011596,
|
|
"rewards/format_reward": 0.9881944537162781,
|
|
"rewards/frontier_aurc_reward": -0.002175836288370192,
|
|
"rewards/frontier_coverage_0": 0.000884566456079483,
|
|
"rewards/frontier_coverage_1": 0.000884566456079483,
|
|
"rewards/frontier_coverage_10": 0.000884566456079483,
|
|
"rewards/frontier_coverage_15": 0.000884566456079483,
|
|
"rewards/frontier_coverage_20": 0.000884566456079483,
|
|
"rewards/frontier_coverage_25": 0.000884566456079483,
|
|
"rewards/frontier_coverage_5": 0.000884566456079483,
|
|
"rewards/frontier_ece_reward": 0.02232353687286377,
|
|
"rewards/frontier_entropy_batch_reward": -0.17922367453575133,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16927083134651183,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19895833333333335,
|
|
"signal/accuracy_reward/group_std_mean": 0.2159910023212433,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08463541567325591,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08463541567325591,
|
|
"signal/advantage_abs_mean": 0.10277295261621475,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10277295261621475,
|
|
"signal/advantage_pre_scale_std": 0.16154861450195312,
|
|
"signal/advantage_std": 0.16154861450195312,
|
|
"signal/brier_reward/centered_abs_mean": 0.18858011364936828,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8704861111111111,
|
|
"signal/brier_reward/group_std_mean": 0.23463993072509765,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018858011066913604,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018858011066913604,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03243453465402126,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8458333333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05219922661781311,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003243453614413738,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003243453614413738,
|
|
"signal/format_reward/centered_abs_mean": 0.01965060755610466,
|
|
"signal/format_reward/group_bin_occupancy": 0.14340277777777777,
|
|
"signal/format_reward/group_std_mean": 0.03690010011196136,
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00982530377805233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00982530377805233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001957321958616376,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6996527777777778,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003007865697145462,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.446652579237707e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.446652579237707e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2180919259786606,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2869709312915802,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2180919259786606,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2869709312915802,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2180919259786606,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2869709312915802,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2180919259786606,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2869709312915802,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2180919259786606,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2869709312915802,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2180919259786606,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2869709312915802,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2180919259786606,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2869709312915802,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027261491399258376,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06389048919081688,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7607638888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08154775202274323,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006389048788696528,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006389048788696528,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25127990543842316,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3219525694847107,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025127990543842314,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025127990543842314,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24865885558565473,
|
|
"calibration/batch_distribution_entropy": 0.96489969481927,
|
|
"calibration/batch_entropy_100bins": 0.9516288732160346,
|
|
"calibration/batch_entropy_10bins": 0.96489969481927,
|
|
"calibration/batch_entropy_50bins": 0.9641975783352297,
|
|
"calibration/batch_uniqueness": 0.948999975102953,
|
|
"calibration/buffer_distribution_entropy": 0.8949759127137261,
|
|
"calibration/buffer_entropy_100bins": 0.8868731831868493,
|
|
"calibration/buffer_entropy_10bins": 0.8949759127137261,
|
|
"calibration/buffer_entropy_50bins": 0.9096876693511307,
|
|
"calibration/confidence_entropy": 0.5163309735163633,
|
|
"calibration/coverage@0%": 0.00838168656056587,
|
|
"calibration/coverage@1%": 0.00838168656056587,
|
|
"calibration/coverage@10%": 0.1720974248452697,
|
|
"calibration/coverage@15%": 0.24661665561450047,
|
|
"calibration/coverage@20%": 0.3965249226348364,
|
|
"calibration/coverage@25%": 0.6515518841034948,
|
|
"calibration/coverage@30%": 0.7176282051282051,
|
|
"calibration/coverage@5%": 0.016715019893899206,
|
|
"calibration/ece": 0.18913358089517085,
|
|
"calibration/mean_confidence": 0.5699598192734223,
|
|
"calibration/prompt_uniqueness": 0.8794210313331021,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005902777777777768,
|
|
"completions/max_length": 2728.0,
|
|
"completions/max_terminated_length": 2728.0,
|
|
"completions/mean_length": 617.7827270507812,
|
|
"completions/mean_terminated_length": 621.47099609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 182.2,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.0004136954667046666,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0037,
|
|
"num_tokens": 141790536.0,
|
|
"reward": 0.9999241590499878,
|
|
"reward_std": 0.12385457307100296,
|
|
"rewards/accuracy_reward": 0.6986111044883728,
|
|
"rewards/brier_reward": 0.764533269405365,
|
|
"rewards/confidence_uniqueness_reward": 0.9451632022857666,
|
|
"rewards/format_reward": 0.9940972208976746,
|
|
"rewards/frontier_aurc_reward": -0.001692651305347681,
|
|
"rewards/frontier_coverage_0": -0.03404254494234919,
|
|
"rewards/frontier_coverage_1": -0.03404254494234919,
|
|
"rewards/frontier_coverage_10": -0.03404254494234919,
|
|
"rewards/frontier_coverage_15": -0.03404254494234919,
|
|
"rewards/frontier_coverage_20": -0.03404254494234919,
|
|
"rewards/frontier_coverage_25": -0.03404254494234919,
|
|
"rewards/frontier_coverage_5": -0.03404254494234919,
|
|
"rewards/frontier_ece_reward": 0.017354899458587168,
|
|
"rewards/frontier_entropy_batch_reward": -0.1613529622554779,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15860459804534913,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2,
|
|
"signal/accuracy_reward/group_std_mean": 0.21018220484256744,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07930229902267456,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07930229902267456,
|
|
"signal/advantage_abs_mean": 0.09171124696731567,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09171124696731567,
|
|
"signal/advantage_pre_scale_std": 0.1453747808933258,
|
|
"signal/advantage_std": 0.1453747808933258,
|
|
"signal/brier_reward/centered_abs_mean": 0.16959642767906188,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8746527777777778,
|
|
"signal/brier_reward/group_std_mean": 0.21293676793575286,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016959642991423607,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016959642991423607,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023114091902971267,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8760416666666668,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03923774063587189,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023114092415198683,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023114092415198683,
|
|
"signal/format_reward/centered_abs_mean": 0.010861545195803046,
|
|
"signal/format_reward/group_bin_occupancy": 0.1392361111111111,
|
|
"signal/format_reward/group_std_mean": 0.0244428563863039,
|
|
"signal/format_reward/group_zero_std_frac": 0.8861111283302308,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005430772597901523,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005430772597901523,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014858563197776675,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7024305555555556,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023568171076476575,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.857320366980275e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.857320366980275e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20621402859687804,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8604166666666668,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2692394435405731,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20621402859687804,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8604166666666668,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2692394435405731,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20621402859687804,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8604166666666668,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2692394435405731,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20621402859687804,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8604166666666668,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2692394435405731,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20621402859687804,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8604166666666668,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2692394435405731,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20621402859687804,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8604166666666668,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2692394435405731,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20621402859687804,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8604166666666668,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2692394435405731,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00257767541334033,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.057116496562957766,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7486111111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07407085299491882,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00571164982393384,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00571164982393384,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23405362963676452,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7993055555555555,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3055886387825012,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023405364155769347,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023405364155769347,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2076444932942266,
|
|
"calibration/batch_distribution_entropy": 0.9566517910549462,
|
|
"calibration/batch_entropy_100bins": 0.9491858911049785,
|
|
"calibration/batch_entropy_10bins": 0.9566517910549462,
|
|
"calibration/batch_entropy_50bins": 0.9586329550903072,
|
|
"calibration/batch_uniqueness": 0.9478874863011697,
|
|
"calibration/buffer_distribution_entropy": 0.9042071878672668,
|
|
"calibration/buffer_entropy_100bins": 0.8985727602789755,
|
|
"calibration/buffer_entropy_10bins": 0.9042071878672668,
|
|
"calibration/buffer_entropy_50bins": 0.9185937542698432,
|
|
"calibration/confidence_entropy": 0.49120686633309446,
|
|
"calibration/coverage@0%": 0.019409886968950837,
|
|
"calibration/coverage@1%": 0.019409886968950837,
|
|
"calibration/coverage@10%": 0.17595065074395838,
|
|
"calibration/coverage@15%": 0.5099854214864996,
|
|
"calibration/coverage@20%": 0.5846423011494986,
|
|
"calibration/coverage@25%": 0.6973665942443268,
|
|
"calibration/coverage@30%": 0.7510149662143296,
|
|
"calibration/coverage@5%": 0.11041733099983801,
|
|
"calibration/ece": 0.18258882314927863,
|
|
"calibration/mean_confidence": 0.6132432048852545,
|
|
"calibration/prompt_uniqueness": 0.8730205259179573,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010243055555555557,
|
|
"completions/max_length": 3484.6,
|
|
"completions/max_terminated_length": 3484.6,
|
|
"completions/mean_length": 672.1552978515625,
|
|
"completions/mean_terminated_length": 679.1800170898438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 170.2,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.0004997280775569379,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0094,
|
|
"num_tokens": 152587045.0,
|
|
"reward": 0.9794714093208313,
|
|
"reward_std": 0.13951779305934905,
|
|
"rewards/accuracy_reward": 0.6626736044883728,
|
|
"rewards/brier_reward": 0.7578543424606323,
|
|
"rewards/confidence_uniqueness_reward": 0.9402774095535278,
|
|
"rewards/format_reward": 0.9893229126930236,
|
|
"rewards/frontier_aurc_reward": -0.0018948199227452277,
|
|
"rewards/frontier_coverage_0": -0.014700169442221522,
|
|
"rewards/frontier_coverage_1": -0.014700169442221522,
|
|
"rewards/frontier_coverage_10": -0.014700169442221522,
|
|
"rewards/frontier_coverage_15": -0.014700169442221522,
|
|
"rewards/frontier_coverage_20": -0.014700169442221522,
|
|
"rewards/frontier_coverage_25": -0.014700169442221522,
|
|
"rewards/frontier_coverage_5": -0.014700169442221522,
|
|
"rewards/frontier_ece_reward": 0.019117896631360053,
|
|
"rewards/frontier_entropy_batch_reward": -0.16941888332366944,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18274739384651184,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20833333333333331,
|
|
"signal/accuracy_reward/group_std_mean": 0.23848095238208772,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.33333333134651183,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09137369692325592,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09137369692325592,
|
|
"signal/advantage_abs_mean": 0.10523971766233445,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10523971766233445,
|
|
"signal/advantage_pre_scale_std": 0.16219059228897095,
|
|
"signal/advantage_std": 0.16219059228897095,
|
|
"signal/brier_reward/centered_abs_mean": 0.17205582857131957,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8708333333333332,
|
|
"signal/brier_reward/group_std_mean": 0.21611034870147705,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017205582931637764,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017205582931637764,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029724714532494544,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.861111111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.046706152707338335,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002972471574321389,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002972471574321389,
|
|
"signal/format_reward/centered_abs_mean": 0.01745334193110466,
|
|
"signal/format_reward/group_bin_occupancy": 0.14131944444444444,
|
|
"signal/format_reward/group_std_mean": 0.03190700151026249,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00872667096555233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00872667096555233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017476935172453523,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7006944444444445,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002774294326081872,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1846168237971142e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1846168237971142e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20781008899211884,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8597222222222223,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27139957547187804,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20781008899211884,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8597222222222223,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27139957547187804,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20781008899211884,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8597222222222223,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27139957547187804,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20781008899211884,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8597222222222223,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27139957547187804,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20781008899211884,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8597222222222223,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27139957547187804,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20781008899211884,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8597222222222223,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27139957547187804,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20781008899211884,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8597222222222223,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27139957547187804,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025976261589676143,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0579615406692028,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7409722222222223,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07379811108112336,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005796154215931893,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005796154215931893,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24322098791599273,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7798611111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3134605050086975,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02432209961116314,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02432209961116314,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20271091384745868,
|
|
"calibration/batch_distribution_entropy": 0.9737261800910246,
|
|
"calibration/batch_entropy_100bins": 0.9591620782628887,
|
|
"calibration/batch_entropy_10bins": 0.9737261800910246,
|
|
"calibration/batch_entropy_50bins": 0.9716196064375529,
|
|
"calibration/batch_uniqueness": 0.9514730500511689,
|
|
"calibration/buffer_distribution_entropy": 0.9112810148129586,
|
|
"calibration/buffer_entropy_100bins": 0.9083171951473498,
|
|
"calibration/buffer_entropy_10bins": 0.9112810148129586,
|
|
"calibration/buffer_entropy_50bins": 0.9257044262181973,
|
|
"calibration/confidence_entropy": 0.5057748742528816,
|
|
"calibration/coverage@0%": 0.01730324074074074,
|
|
"calibration/coverage@1%": 0.01730324074074074,
|
|
"calibration/coverage@10%": 0.2049778473780437,
|
|
"calibration/coverage@15%": 0.3992730494612039,
|
|
"calibration/coverage@20%": 0.5388349573395385,
|
|
"calibration/coverage@25%": 0.7145227360868722,
|
|
"calibration/coverage@30%": 0.8611193783068783,
|
|
"calibration/coverage@5%": 0.0400214947089947,
|
|
"calibration/ece": 0.1378626115809829,
|
|
"calibration/mean_confidence": 0.5521758505614638,
|
|
"calibration/prompt_uniqueness": 0.8702453560167897,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.005555555555555536,
|
|
"completions/max_length": 3238.0,
|
|
"completions/max_terminated_length": 3238.0,
|
|
"completions/mean_length": 672.5680541992188,
|
|
"completions/mean_terminated_length": 676.3651611328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 220.4,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.00044680043356493115,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0043,
|
|
"num_tokens": 163422229.0,
|
|
"reward": 0.9934585094451904,
|
|
"reward_std": 0.1226073071360588,
|
|
"rewards/accuracy_reward": 0.6869791746139526,
|
|
"rewards/brier_reward": 0.7851580858230591,
|
|
"rewards/confidence_uniqueness_reward": 0.9424677133560181,
|
|
"rewards/format_reward": 0.9941840291023254,
|
|
"rewards/frontier_aurc_reward": -0.0016598706366494297,
|
|
"rewards/frontier_coverage_0": -0.004633589053992182,
|
|
"rewards/frontier_coverage_1": -0.004633589053992182,
|
|
"rewards/frontier_coverage_10": -0.004633589053992182,
|
|
"rewards/frontier_coverage_15": -0.004633589053992182,
|
|
"rewards/frontier_coverage_20": -0.004633589053992182,
|
|
"rewards/frontier_coverage_25": -0.004633589053992182,
|
|
"rewards/frontier_coverage_5": -0.004633589053992182,
|
|
"rewards/frontier_ece_reward": 0.02196214161813259,
|
|
"rewards/frontier_entropy_batch_reward": -0.21655711829662322,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16119791865348815,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776,
|
|
"signal/accuracy_reward/group_std_mean": 0.2105330467224121,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08059895932674407,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08059895932674407,
|
|
"signal/advantage_abs_mean": 0.0923902839422226,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0923902839422226,
|
|
"signal/advantage_pre_scale_std": 0.14465830028057097,
|
|
"signal/advantage_std": 0.14465830028057097,
|
|
"signal/brier_reward/centered_abs_mean": 0.1523455262184143,
|
|
"signal/brier_reward/group_bin_occupancy": 0.851388888888889,
|
|
"signal/brier_reward/group_std_mean": 0.1943160504102707,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015234552882611751,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015234552882611751,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421053908765316,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8864583333333332,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.038174081966280936,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024210539646446704,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024210539646446704,
|
|
"signal/format_reward/centered_abs_mean": 0.01048719622194767,
|
|
"signal/format_reward/group_bin_occupancy": 0.13645833333333332,
|
|
"signal/format_reward/group_std_mean": 0.021353743970394135,
|
|
"signal/format_reward/group_zero_std_frac": 0.9083333492279053,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005243598110973835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005243598110973835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015173830557614564,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6986111111111112,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024198783095926045,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8967288997373544e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8967288997373544e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19244979321956635,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8506944444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2510490626096725,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19244979321956635,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8506944444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2510490626096725,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19244979321956635,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8506944444444444,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2510490626096725,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19244979321956635,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8506944444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2510490626096725,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19244979321956635,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8506944444444444,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2510490626096725,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19244979321956635,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8506944444444444,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2510490626096725,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19244979321956635,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8506944444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2510490626096725,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024056224152445792,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.052489711344242095,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7017361111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06726017668843269,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005248971004039049,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005248971004039049,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2638679683208466,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.8125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3345774471759796,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026386797800660132,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026386797800660132,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1771991198328447,
|
|
"calibration/batch_distribution_entropy": 0.96243817415222,
|
|
"calibration/batch_entropy_100bins": 0.9507006712592847,
|
|
"calibration/batch_entropy_10bins": 0.96243817415222,
|
|
"calibration/batch_entropy_50bins": 0.964868282268785,
|
|
"calibration/batch_uniqueness": 0.9487259176376346,
|
|
"calibration/buffer_distribution_entropy": 0.9187161356306482,
|
|
"calibration/buffer_entropy_100bins": 0.9165996492677836,
|
|
"calibration/buffer_entropy_10bins": 0.9187161356306482,
|
|
"calibration/buffer_entropy_50bins": 0.9323867334705161,
|
|
"calibration/confidence_entropy": 0.5093906412982893,
|
|
"calibration/coverage@0%": 0.048757301233618,
|
|
"calibration/coverage@1%": 0.048757301233618,
|
|
"calibration/coverage@10%": 0.306309071579122,
|
|
"calibration/coverage@15%": 0.4915969045669811,
|
|
"calibration/coverage@20%": 0.603687003183948,
|
|
"calibration/coverage@25%": 0.780469418411813,
|
|
"calibration/coverage@30%": 0.8668421125340184,
|
|
"calibration/coverage@5%": 0.08637593441360888,
|
|
"calibration/ece": 0.16357922386808874,
|
|
"calibration/mean_confidence": 0.5690209591292246,
|
|
"calibration/prompt_uniqueness": 0.8674022167680248,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0064236111111110935,
|
|
"completions/max_length": 3078.6,
|
|
"completions/max_terminated_length": 3078.6,
|
|
"completions/mean_length": 640.9775268554688,
|
|
"completions/mean_terminated_length": 645.139404296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 178.6,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.0004924891400150955,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0054,
|
|
"num_tokens": 173874962.0,
|
|
"reward": 0.9923341274261475,
|
|
"reward_std": 0.12305669635534286,
|
|
"rewards/accuracy_reward": 0.6828993082046508,
|
|
"rewards/brier_reward": 0.777597713470459,
|
|
"rewards/confidence_uniqueness_reward": 0.9433488249778748,
|
|
"rewards/format_reward": 0.9934895753860473,
|
|
"rewards/frontier_aurc_reward": -0.0015883626649156213,
|
|
"rewards/frontier_coverage_0": -0.00989127003122121,
|
|
"rewards/frontier_coverage_1": -0.00989127003122121,
|
|
"rewards/frontier_coverage_10": -0.00989127003122121,
|
|
"rewards/frontier_coverage_15": -0.00989127003122121,
|
|
"rewards/frontier_coverage_20": -0.00989127003122121,
|
|
"rewards/frontier_coverage_25": -0.00989127003122121,
|
|
"rewards/frontier_coverage_5": -0.00989127003122121,
|
|
"rewards/frontier_ece_reward": 0.018967508152127267,
|
|
"rewards/frontier_entropy_batch_reward": -0.18966446816921234,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16197374165058137,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19791666666666669,
|
|
"signal/accuracy_reward/group_std_mean": 0.20919720828533173,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08098687082529069,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08098687082529069,
|
|
"signal/advantage_abs_mean": 0.09262900650501252,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09262900650501252,
|
|
"signal/advantage_pre_scale_std": 0.14502845108509063,
|
|
"signal/advantage_std": 0.14502845108509063,
|
|
"signal/brier_reward/centered_abs_mean": 0.15718668401241304,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8552083333333333,
|
|
"signal/brier_reward/group_std_mean": 0.1992782771587372,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01571866814047098,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01571866814047098,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024519116804003715,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.88125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03934002220630646,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024519118014723063,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024519118014723063,
|
|
"signal/format_reward/centered_abs_mean": 0.011572265625,
|
|
"signal/format_reward/group_bin_occupancy": 0.1378472222222222,
|
|
"signal/format_reward/group_std_mean": 0.023632752522826195,
|
|
"signal/format_reward/group_zero_std_frac": 0.8972222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0057861328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0057861328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014947153860703111,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023850529454648496,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.868394247139804e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.868394247139804e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1994914710521698,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8565972222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.25926323533058165,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1994914710521698,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8565972222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25926323533058165,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1994914710521698,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8565972222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25926323533058165,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1994914710521698,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8565972222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25926323533058165,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1994914710521698,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8565972222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25926323533058165,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1994914710521698,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8565972222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25926323533058165,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1994914710521698,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8565972222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25926323533058165,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024936434347182512,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.050507232546806335,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6986111111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06456724032759667,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005050723347812891,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005050723347812891,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25887452661991117,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7711805555555555,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3302801251411438,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02588745318353176,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02588745318353176,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21387456839353614,
|
|
"calibration/batch_distribution_entropy": 0.9823557824251997,
|
|
"calibration/batch_entropy_100bins": 0.9648278783784832,
|
|
"calibration/batch_entropy_10bins": 0.9823557824251997,
|
|
"calibration/batch_entropy_50bins": 0.9772968018292392,
|
|
"calibration/batch_uniqueness": 0.9534788717971289,
|
|
"calibration/buffer_distribution_entropy": 0.9254646438274845,
|
|
"calibration/buffer_entropy_100bins": 0.9237693855572863,
|
|
"calibration/buffer_entropy_10bins": 0.9254646438274845,
|
|
"calibration/buffer_entropy_50bins": 0.9382846474644742,
|
|
"calibration/confidence_entropy": 0.4893090729142555,
|
|
"calibration/coverage@0%": 0.06979967507118581,
|
|
"calibration/coverage@1%": 0.10558914875539635,
|
|
"calibration/coverage@10%": 0.27119950973638285,
|
|
"calibration/coverage@15%": 0.5634953097731239,
|
|
"calibration/coverage@20%": 0.6151696116928447,
|
|
"calibration/coverage@25%": 0.654349367364747,
|
|
"calibration/coverage@30%": 0.7034767670157068,
|
|
"calibration/coverage@5%": 0.18697819073206579,
|
|
"calibration/ece": 0.20191279563686187,
|
|
"calibration/mean_confidence": 0.5199859337619741,
|
|
"calibration/prompt_uniqueness": 0.8663527153909893,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.004253472222222232,
|
|
"completions/max_length": 3003.0,
|
|
"completions/max_terminated_length": 3003.0,
|
|
"completions/mean_length": 630.5767456054688,
|
|
"completions/mean_terminated_length": 633.2605712890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 162.4,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.00040928201633505523,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0023,
|
|
"num_tokens": 184230886.0,
|
|
"reward": 0.9884204149246216,
|
|
"reward_std": 0.1153764232993126,
|
|
"rewards/accuracy_reward": 0.6644097328186035,
|
|
"rewards/brier_reward": 0.7661008715629578,
|
|
"rewards/confidence_uniqueness_reward": 0.9478963613510132,
|
|
"rewards/format_reward": 0.9957465291023254,
|
|
"rewards/frontier_aurc_reward": -0.0016027359291911126,
|
|
"rewards/frontier_coverage_0": -0.002308785542845726,
|
|
"rewards/frontier_coverage_1": -0.002308785542845726,
|
|
"rewards/frontier_coverage_10": -0.002308785542845726,
|
|
"rewards/frontier_coverage_15": -0.002308785542845726,
|
|
"rewards/frontier_coverage_20": -0.002308785542845726,
|
|
"rewards/frontier_coverage_25": -0.002308785542845726,
|
|
"rewards/frontier_coverage_5": -0.002308785542845726,
|
|
"rewards/frontier_ece_reward": 0.019070269353687764,
|
|
"rewards/frontier_entropy_batch_reward": -0.14742431938648223,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14867621660232544,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
|
|
"signal/accuracy_reward/group_std_mean": 0.199493145942688,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07433810830116272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07433810830116272,
|
|
"signal/advantage_abs_mean": 0.08577116578817368,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08577116578817368,
|
|
"signal/advantage_pre_scale_std": 0.13571085333824157,
|
|
"signal/advantage_std": 0.13571085333824157,
|
|
"signal/brier_reward/centered_abs_mean": 0.16521736681461335,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8541666666666667,
|
|
"signal/brier_reward/group_std_mean": 0.20886048674583435,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016521737165749072,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016521737165749072,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01989307664334774,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.884375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03198789656162262,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001989307696931064,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001989307696931064,
|
|
"signal/format_reward/centered_abs_mean": 0.007590060774236918,
|
|
"signal/format_reward/group_bin_occupancy": 0.13506944444444444,
|
|
"signal/format_reward/group_std_mean": 0.01698396187275648,
|
|
"signal/format_reward/group_zero_std_frac": 0.919444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003795030387118459,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003795030387118459,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001562464004382491,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6795138888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025261019822210074,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.953080100065563e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.953080100065563e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2118411511182785,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2758490860462189,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2118411511182785,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2758490860462189,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2118411511182785,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2758490860462189,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2118411511182785,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2758490860462189,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2118411511182785,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2758490860462189,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2118411511182785,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2758490860462189,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2118411511182785,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2758490860462189,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026480144821107387,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05060374662280083,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7100694444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06428168565034867,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050603746436536316,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050603746436536316,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22136266827583312,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763888888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.29468963146209715,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022136268392205238,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022136268392205238,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.167972204724354,
|
|
"calibration/batch_distribution_entropy": 0.9864342846502329,
|
|
"calibration/batch_entropy_100bins": 0.9650832272289168,
|
|
"calibration/batch_entropy_10bins": 0.9864342846502329,
|
|
"calibration/batch_entropy_50bins": 0.9783947242346824,
|
|
"calibration/batch_uniqueness": 0.953848941281708,
|
|
"calibration/buffer_distribution_entropy": 0.9315273864669636,
|
|
"calibration/buffer_entropy_100bins": 0.9301545524945812,
|
|
"calibration/buffer_entropy_10bins": 0.9315273864669636,
|
|
"calibration/buffer_entropy_50bins": 0.9434719750608054,
|
|
"calibration/confidence_entropy": 0.5021417689027668,
|
|
"calibration/coverage@0%": 0.030512946582513044,
|
|
"calibration/coverage@1%": 0.030512946582513044,
|
|
"calibration/coverage@10%": 0.34233461774347035,
|
|
"calibration/coverage@15%": 0.47939107668565634,
|
|
"calibration/coverage@20%": 0.7026149703671164,
|
|
"calibration/coverage@25%": 0.8276201440877331,
|
|
"calibration/coverage@30%": 0.8976031607885394,
|
|
"calibration/coverage@5%": 0.11523160748304304,
|
|
"calibration/ece": 0.17367782749954952,
|
|
"calibration/mean_confidence": 0.5388179797320996,
|
|
"calibration/prompt_uniqueness": 0.8750848836612375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008333333333333326,
|
|
"completions/max_length": 2917.4,
|
|
"completions/max_terminated_length": 2917.4,
|
|
"completions/mean_length": 626.88291015625,
|
|
"completions/mean_terminated_length": 632.1784790039062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 188.8,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.0003905615594703704,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0055,
|
|
"num_tokens": 194551649.0,
|
|
"reward": 0.9936325430870057,
|
|
"reward_std": 0.12192367911338806,
|
|
"rewards/accuracy_reward": 0.6775173544883728,
|
|
"rewards/brier_reward": 0.7730206847190857,
|
|
"rewards/confidence_uniqueness_reward": 0.9442116141319274,
|
|
"rewards/format_reward": 0.9916666746139526,
|
|
"rewards/frontier_aurc_reward": -0.0014509693486616016,
|
|
"rewards/frontier_coverage_0": -8.213166147470474e-05,
|
|
"rewards/frontier_coverage_1": -8.213166147470474e-05,
|
|
"rewards/frontier_coverage_10": -8.213166147470474e-05,
|
|
"rewards/frontier_coverage_15": -8.213166147470474e-05,
|
|
"rewards/frontier_coverage_20": -8.213166147470474e-05,
|
|
"rewards/frontier_coverage_25": -8.213166147470474e-05,
|
|
"rewards/frontier_coverage_5": -8.213166147470474e-05,
|
|
"rewards/frontier_ece_reward": 0.01913320329040289,
|
|
"rewards/frontier_entropy_batch_reward": -0.14570734947919844,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16055229902267457,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19930555555555557,
|
|
"signal/accuracy_reward/group_std_mean": 0.21070023775100707,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08027614951133728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08027614951133728,
|
|
"signal/advantage_abs_mean": 0.090542633831501,
|
|
"signal/advantage_pre_scale_abs_mean": 0.090542633831501,
|
|
"signal/advantage_pre_scale_std": 0.14465495347976684,
|
|
"signal/advantage_std": 0.14465495347976684,
|
|
"signal/brier_reward/centered_abs_mean": 0.16810146272182463,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8604166666666666,
|
|
"signal/brier_reward/group_std_mean": 0.21120634078979492,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016810146719217302,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016810146719217302,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024970437213778496,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8760416666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.040504425019025805,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024970436468720438,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024970436468720438,
|
|
"signal/format_reward/centered_abs_mean": 0.01360677070915699,
|
|
"signal/format_reward/group_bin_occupancy": 0.13923611111111112,
|
|
"signal/format_reward/group_std_mean": 0.026751523464918138,
|
|
"signal/format_reward/group_zero_std_frac": 0.8861111164093017,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006803385354578495,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006803385354578495,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001497122971341014,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024236575700342655,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.871403837867547e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.871403837867547e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22072286307811737,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.28572168946266174,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22072286307811737,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28572168946266174,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22072286307811737,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28572168946266174,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22072286307811737,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28572168946266174,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22072286307811737,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28572168946266174,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22072286307811737,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28572168946266174,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22072286307811737,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28572168946266174,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027590358164161443,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04990529865026474,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7184027777777777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06357247680425644,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004990530014038086,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004990530014038086,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22153306305408477,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7583333333333333,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2920175909996033,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022153307124972342,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022153307124972342,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.1696630845347579,
|
|
"eval_calibration/batch_distribution_entropy": 0.9338246735522308,
|
|
"eval_calibration/batch_entropy_100bins": 0.706892459061427,
|
|
"eval_calibration/batch_entropy_10bins": 0.9338246735522308,
|
|
"eval_calibration/batch_entropy_50bins": 0.7818401869433624,
|
|
"eval_calibration/batch_uniqueness": 0.8962181321540061,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9351777755636745,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9338170230335757,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9351777755636745,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9465654691196751,
|
|
"eval_calibration/confidence_entropy": 0.47687795562464963,
|
|
"eval_calibration/coverage@0%": 0.21908602150537634,
|
|
"eval_calibration/coverage@1%": 0.21908602150537634,
|
|
"eval_calibration/coverage@10%": 0.401377688172043,
|
|
"eval_calibration/coverage@15%": 0.4847110215053763,
|
|
"eval_calibration/coverage@20%": 0.6584341397849462,
|
|
"eval_calibration/coverage@25%": 0.8929771505376344,
|
|
"eval_calibration/coverage@30%": 0.946236559139785,
|
|
"eval_calibration/coverage@5%": 0.276377688172043,
|
|
"eval_calibration/ece": 0.23803624165826145,
|
|
"eval_calibration/mean_confidence": 0.569122975052372,
|
|
"eval_calibration/prompt_uniqueness": 0.8962181321540061,
|
|
"eval_completions/clipped_ratio": 0.010416666666666666,
|
|
"eval_completions/max_length": 2299.3333333333335,
|
|
"eval_completions/max_terminated_length": 2299.3333333333335,
|
|
"eval_completions/mean_length": 617.4854431152344,
|
|
"eval_completions/mean_terminated_length": 624.0654602050781,
|
|
"eval_completions/min_length": 45.333333333333336,
|
|
"eval_completions/min_terminated_length": 213.66666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 194551649.0,
|
|
"eval_reward": 0.9322001536687216,
|
|
"eval_reward_std": 0.23809615274270376,
|
|
"eval_rewards/accuracy_reward": 0.6710069477558136,
|
|
"eval_rewards/brier_reward": 0.7738438149293264,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8830358386039734,
|
|
"eval_rewards/format_reward": 0.9869791766007742,
|
|
"eval_rewards/frontier_aurc_reward": -0.0014650731851967673,
|
|
"eval_rewards/frontier_coverage_0": 0.00044721147666374844,
|
|
"eval_rewards/frontier_coverage_1": 0.00044721147666374844,
|
|
"eval_rewards/frontier_coverage_10": 0.00044721147666374844,
|
|
"eval_rewards/frontier_coverage_15": 0.00044721147666374844,
|
|
"eval_rewards/frontier_coverage_20": 0.00044721147666374844,
|
|
"eval_rewards/frontier_coverage_25": 0.00044721147666374844,
|
|
"eval_rewards/frontier_coverage_5": 0.00044721147666374844,
|
|
"eval_rewards/frontier_ece_reward": 0.01968886749818921,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6447059710820516,
|
|
"eval_runtime": 205.8691,
|
|
"eval_samples_per_second": 4.857,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4316948801279068,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4710538685321808,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2158474400639534,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2158474400639534,
|
|
"eval_signal/advantage_abs_mean": 0.20248722285032272,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20248722285032272,
|
|
"eval_signal/advantage_pre_scale_std": 0.23723148057858148,
|
|
"eval_signal/advantage_std": 0.23723148057858148,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20928792655467987,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8854166666666666,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2662544945875804,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020928792965908844,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020928792965908844,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.054847310607632004,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09344139198462169,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005484731014197071,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005484731014197071,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.025010850590964157,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.17013888888888887,
|
|
"eval_signal/format_reward/group_std_mean": 0.06767813861370087,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.6388889104127884,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012505425295482079,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012505425295482079,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020411182777024806,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6631944444444445,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0037244935131942234,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.551397907761081e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.551397907761081e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2758402054508527,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.39653781056404114,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2758402054508527,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.39653781056404114,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2758402054508527,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.39653781056404114,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2758402054508527,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.39653781056404114,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2758402054508527,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.39653781056404114,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2758402054508527,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.39653781056404114,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2758402054508527,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.39653781056404114,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00344800246724238,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05408057694633802,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9097222222222223,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.0697250347584486,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005408057787766059,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005408057787766059,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3219749679168065,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2951388888888889,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33611299594243366,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032197498405973114,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032197498405973114,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28984463916978553,
|
|
"calibration/batch_distribution_entropy": 0.9711874364190758,
|
|
"calibration/batch_entropy_100bins": 0.9559796276599972,
|
|
"calibration/batch_entropy_10bins": 0.9711874364190758,
|
|
"calibration/batch_entropy_50bins": 0.967740156453336,
|
|
"calibration/batch_uniqueness": 0.9505796993365717,
|
|
"calibration/buffer_distribution_entropy": 0.936615480617325,
|
|
"calibration/buffer_entropy_100bins": 0.9357953238378386,
|
|
"calibration/buffer_entropy_10bins": 0.936615480617325,
|
|
"calibration/buffer_entropy_50bins": 0.9480214843525481,
|
|
"calibration/confidence_entropy": 0.5001631137185123,
|
|
"calibration/coverage@0%": 0.01958675450744108,
|
|
"calibration/coverage@1%": 0.01958675450744108,
|
|
"calibration/coverage@10%": 0.16792198551812815,
|
|
"calibration/coverage@15%": 0.18639011882766576,
|
|
"calibration/coverage@20%": 0.2856551133219956,
|
|
"calibration/coverage@25%": 0.3307244292133193,
|
|
"calibration/coverage@30%": 0.48409656797179723,
|
|
"calibration/coverage@5%": 0.1408980940299875,
|
|
"calibration/ece": 0.14950855344866051,
|
|
"calibration/mean_confidence": 0.5798324340804183,
|
|
"calibration/prompt_uniqueness": 0.8621616643105219,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010416666666666675,
|
|
"completions/max_length": 3329.6,
|
|
"completions/max_terminated_length": 3329.6,
|
|
"completions/mean_length": 630.7128662109375,
|
|
"completions/mean_terminated_length": 637.3713989257812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.6,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.0003931356477551162,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.0083,
|
|
"num_tokens": 204894325.0,
|
|
"reward": 0.9878118515014649,
|
|
"reward_std": 0.12439936995506287,
|
|
"rewards/accuracy_reward": 0.6724826335906983,
|
|
"rewards/brier_reward": 0.7758374571800232,
|
|
"rewards/confidence_uniqueness_reward": 0.9409982562065125,
|
|
"rewards/format_reward": 0.9895833373069763,
|
|
"rewards/frontier_aurc_reward": -0.0015477648237720131,
|
|
"rewards/frontier_coverage_0": -0.00018841465935111047,
|
|
"rewards/frontier_coverage_1": -0.00018841465935111047,
|
|
"rewards/frontier_coverage_10": -0.00018841465935111047,
|
|
"rewards/frontier_coverage_15": -0.00018841465935111047,
|
|
"rewards/frontier_coverage_20": -0.00018841465935111047,
|
|
"rewards/frontier_coverage_25": -0.00018841465935111047,
|
|
"rewards/frontier_coverage_5": -0.00018841465935111047,
|
|
"rewards/frontier_ece_reward": 0.017811648175120355,
|
|
"rewards/frontier_entropy_batch_reward": -0.16650085747241974,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1506781682372093,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776,
|
|
"signal/accuracy_reward/group_std_mean": 0.20425570011138916,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4027777910232544,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07533908411860465,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07533908411860465,
|
|
"signal/advantage_abs_mean": 0.09059339612722397,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09059339612722397,
|
|
"signal/advantage_pre_scale_std": 0.14705823063850404,
|
|
"signal/advantage_std": 0.14705823063850404,
|
|
"signal/brier_reward/centered_abs_mean": 0.1585765987634659,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85,
|
|
"signal/brier_reward/group_std_mean": 0.20116137266159057,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015857660584151743,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015857660584151743,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028387091308832168,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8545138888888889,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.046965491771697995,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002838709158822894,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002838709158822894,
|
|
"signal/format_reward/centered_abs_mean": 0.016764323227107525,
|
|
"signal/format_reward/group_bin_occupancy": 0.14270833333333335,
|
|
"signal/format_reward/group_std_mean": 0.03298989050090313,
|
|
"signal/format_reward/group_zero_std_frac": 0.8583333373069764,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008382161613553762,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008382161613553762,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001613885280676186,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6847222222222223,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026253133080899717,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.017356746364385e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.017356746364385e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19782112836837767,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2575752854347229,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19782112836837767,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2575752854347229,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19782112836837767,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2575752854347229,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19782112836837767,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2575752854347229,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19782112836837767,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2575752854347229,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19782112836837767,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2575752854347229,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19782112836837767,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2575752854347229,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024727642070502044,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04588953480124473,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7059027777777779,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.058351149410009386,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004588953498750925,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004588953498750925,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2355831891298294,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7725694444444444,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3069721281528473,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023558317869901656,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023558317869901656,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16567868451536366,
|
|
"calibration/batch_distribution_entropy": 0.9527322950855959,
|
|
"calibration/batch_entropy_100bins": 0.947479658399056,
|
|
"calibration/batch_entropy_10bins": 0.9527322950855959,
|
|
"calibration/batch_entropy_50bins": 0.9567347662661371,
|
|
"calibration/batch_uniqueness": 0.9478300016023,
|
|
"calibration/buffer_distribution_entropy": 0.9392886191298354,
|
|
"calibration/buffer_entropy_100bins": 0.9397206217856423,
|
|
"calibration/buffer_entropy_10bins": 0.9392886191298354,
|
|
"calibration/buffer_entropy_50bins": 0.9508547762112409,
|
|
"calibration/confidence_entropy": 0.5005231604018499,
|
|
"calibration/coverage@0%": 0.03288614870450616,
|
|
"calibration/coverage@1%": 0.03288614870450616,
|
|
"calibration/coverage@10%": 0.3226409737961885,
|
|
"calibration/coverage@15%": 0.477594869979861,
|
|
"calibration/coverage@20%": 0.6694523645087733,
|
|
"calibration/coverage@25%": 0.7880677437475745,
|
|
"calibration/coverage@30%": 0.9082701187335092,
|
|
"calibration/coverage@5%": 0.14515768917448005,
|
|
"calibration/ece": 0.13138400877437387,
|
|
"calibration/mean_confidence": 0.6028444186351783,
|
|
"calibration/prompt_uniqueness": 0.8709859308023828,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010069444444444419,
|
|
"completions/max_length": 3546.2,
|
|
"completions/max_terminated_length": 3546.2,
|
|
"completions/mean_length": 640.7478271484375,
|
|
"completions/mean_terminated_length": 647.2959106445312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.6,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.0004377875302452594,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.009,
|
|
"num_tokens": 215384188.0,
|
|
"reward": 1.001729953289032,
|
|
"reward_std": 0.1265808016061783,
|
|
"rewards/accuracy_reward": 0.7092881917953491,
|
|
"rewards/brier_reward": 0.7818510174751282,
|
|
"rewards/confidence_uniqueness_reward": 0.9397328972816468,
|
|
"rewards/format_reward": 0.9899305582046509,
|
|
"rewards/frontier_aurc_reward": -0.001325283572077751,
|
|
"rewards/frontier_coverage_0": -0.02074230033904314,
|
|
"rewards/frontier_coverage_1": -0.02074230033904314,
|
|
"rewards/frontier_coverage_10": -0.02074230033904314,
|
|
"rewards/frontier_coverage_15": -0.02074230033904314,
|
|
"rewards/frontier_coverage_20": -0.02074230033904314,
|
|
"rewards/frontier_coverage_25": -0.02074230033904314,
|
|
"rewards/frontier_coverage_5": -0.02074230033904314,
|
|
"rewards/frontier_ece_reward": 0.01406394112855196,
|
|
"rewards/frontier_entropy_batch_reward": -0.19612716436386107,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15444336235523223,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20381944444444441,
|
|
"signal/accuracy_reward/group_std_mean": 0.21105689704418182,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07722168117761612,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07722168117761612,
|
|
"signal/advantage_abs_mean": 0.0925620898604393,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0925620898604393,
|
|
"signal/advantage_pre_scale_std": 0.15025562345981597,
|
|
"signal/advantage_std": 0.15025562345981597,
|
|
"signal/brier_reward/centered_abs_mean": 0.15158471167087556,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8395833333333332,
|
|
"signal/brier_reward/group_std_mean": 0.19477559626102448,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015158471278846264,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015158471278846264,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02864648588001728,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8680555555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.045315783470869064,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002864648727700114,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002864648727700114,
|
|
"signal/format_reward/centered_abs_mean": 0.01662326380610466,
|
|
"signal/format_reward/group_bin_occupancy": 0.14097222222222222,
|
|
"signal/format_reward/group_std_mean": 0.03081248588860035,
|
|
"signal/format_reward/group_zero_std_frac": 0.8722222208976745,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00831163190305233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00831163190305233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013894882751628757,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.679513888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00229809598531574,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7368603403156156e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7368603403156156e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1891954332590103,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24967995285987854,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1891954332590103,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24967995285987854,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1891954332590103,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24967995285987854,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1891954332590103,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24967995285987854,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1891954332590103,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24967995285987854,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1891954332590103,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24967995285987854,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1891954332590103,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24967995285987854,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002364942990243435,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.041539561748504636,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7034722222222223,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05418416783213616,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041539563797414305,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041539563797414305,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2623390406370163,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7722222222222223,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.335136216878891,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026233907043933868,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026233907043933868,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29473056838071987,
|
|
"calibration/batch_distribution_entropy": 0.9619916322012759,
|
|
"calibration/batch_entropy_100bins": 0.9549874064799202,
|
|
"calibration/batch_entropy_10bins": 0.9619916322012759,
|
|
"calibration/batch_entropy_50bins": 0.9647061223514687,
|
|
"calibration/batch_uniqueness": 0.9495682126495403,
|
|
"calibration/buffer_distribution_entropy": 0.9414297817272864,
|
|
"calibration/buffer_entropy_100bins": 0.9430816398527859,
|
|
"calibration/buffer_entropy_10bins": 0.9414297817272864,
|
|
"calibration/buffer_entropy_50bins": 0.9532562123434823,
|
|
"calibration/confidence_entropy": 0.497030786564154,
|
|
"calibration/coverage@0%": 0.013913364638495167,
|
|
"calibration/coverage@1%": 0.013913364638495167,
|
|
"calibration/coverage@10%": 0.040717969175169524,
|
|
"calibration/coverage@15%": 0.11740594901471416,
|
|
"calibration/coverage@20%": 0.3688631313189874,
|
|
"calibration/coverage@25%": 0.47341993382008873,
|
|
"calibration/coverage@30%": 0.5718340073342899,
|
|
"calibration/coverage@5%": 0.013913364638495167,
|
|
"calibration/ece": 0.18056783838522902,
|
|
"calibration/mean_confidence": 0.5710495335219846,
|
|
"calibration/prompt_uniqueness": 0.8647219156071875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014930555555555535,
|
|
"completions/max_length": 3417.4,
|
|
"completions/max_terminated_length": 3417.4,
|
|
"completions/mean_length": 639.9868041992188,
|
|
"completions/mean_terminated_length": 649.7999755859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.6,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.00040515753789804876,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0125,
|
|
"num_tokens": 225836036.0,
|
|
"reward": 0.9746179699897766,
|
|
"reward_std": 0.13332333266735077,
|
|
"rewards/accuracy_reward": 0.6622395753860474,
|
|
"rewards/brier_reward": 0.7698838829994201,
|
|
"rewards/confidence_uniqueness_reward": 0.9336004853248596,
|
|
"rewards/format_reward": 0.9848090410232544,
|
|
"rewards/frontier_aurc_reward": -0.0017310404684394598,
|
|
"rewards/frontier_coverage_0": 0.004687186796218157,
|
|
"rewards/frontier_coverage_1": 0.004687186796218157,
|
|
"rewards/frontier_coverage_10": 0.004687186796218157,
|
|
"rewards/frontier_coverage_15": 0.004687186796218157,
|
|
"rewards/frontier_coverage_20": 0.004687186796218157,
|
|
"rewards/frontier_coverage_25": 0.004687186796218157,
|
|
"rewards/frontier_coverage_5": 0.004687186796218157,
|
|
"rewards/frontier_ece_reward": 0.016754307225346564,
|
|
"rewards/frontier_entropy_batch_reward": -0.213187313079834,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15972764790058136,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
|
|
"signal/accuracy_reward/group_std_mean": 0.206351837515831,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07986382395029068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07986382395029068,
|
|
"signal/advantage_abs_mean": 0.09935903698205947,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09935903698205947,
|
|
"signal/advantage_pre_scale_std": 0.1625169038772583,
|
|
"signal/advantage_std": 0.1625169038772583,
|
|
"signal/brier_reward/centered_abs_mean": 0.16147418916225434,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8649305555555555,
|
|
"signal/brier_reward/group_std_mean": 0.20425305664539337,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016147419437766077,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016147419437766077,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036166596412658694,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8284722222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05944165885448456,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003616659576073289,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003616659576073289,
|
|
"signal/format_reward/centered_abs_mean": 0.02408311627805233,
|
|
"signal/format_reward/group_bin_occupancy": 0.1482638888888889,
|
|
"signal/format_reward/group_std_mean": 0.04518638737499714,
|
|
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012041558139026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012041558139026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017531340941786765,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6899305555555555,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002778572216629982,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1914176249993035e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1914176249993035e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19187724888324736,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.25314462184906006,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19187724888324736,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25314462184906006,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19187724888324736,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25314462184906006,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19187724888324736,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25314462184906006,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19187724888324736,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25314462184906006,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19187724888324736,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25314462184906006,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19187724888324736,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25314462184906006,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023984656669199466,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04284475669264794,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6947916666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.054882925003767014,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004284475743770599,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004284475743770599,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2705032885074615,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7972222222222222,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33811612129211427,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027050328627228736,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027050328627228736,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2692657849205195,
|
|
"calibration/batch_distribution_entropy": 0.9659327417988296,
|
|
"calibration/batch_entropy_100bins": 0.9541108697407532,
|
|
"calibration/batch_entropy_10bins": 0.9659327417988296,
|
|
"calibration/batch_entropy_50bins": 0.9666704637716524,
|
|
"calibration/batch_uniqueness": 0.9494544866872708,
|
|
"calibration/buffer_distribution_entropy": 0.9450548596655558,
|
|
"calibration/buffer_entropy_100bins": 0.946823957265584,
|
|
"calibration/buffer_entropy_10bins": 0.9450548596655558,
|
|
"calibration/buffer_entropy_50bins": 0.9562487948463927,
|
|
"calibration/confidence_entropy": 0.4875023680573502,
|
|
"calibration/coverage@0%": 0.017792988425402438,
|
|
"calibration/coverage@1%": 0.017792988425402438,
|
|
"calibration/coverage@10%": 0.2124956955037931,
|
|
"calibration/coverage@15%": 0.4193426391829937,
|
|
"calibration/coverage@20%": 0.5009450510714555,
|
|
"calibration/coverage@25%": 0.5343342250707565,
|
|
"calibration/coverage@30%": 0.566149934383202,
|
|
"calibration/coverage@5%": 0.05855147063862448,
|
|
"calibration/ece": 0.17746895129703658,
|
|
"calibration/mean_confidence": 0.5736739808286649,
|
|
"calibration/prompt_uniqueness": 0.865164092609454,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012326388888888906,
|
|
"completions/max_length": 3293.2,
|
|
"completions/max_terminated_length": 3293.2,
|
|
"completions/mean_length": 630.0300415039062,
|
|
"completions/mean_terminated_length": 637.8402709960938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.6,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.0003517817531246692,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.0098,
|
|
"num_tokens": 236175838.0,
|
|
"reward": 0.9898527383804321,
|
|
"reward_std": 0.1258085072040558,
|
|
"rewards/accuracy_reward": 0.6803819417953492,
|
|
"rewards/brier_reward": 0.7743050813674927,
|
|
"rewards/confidence_uniqueness_reward": 0.9388420104980468,
|
|
"rewards/format_reward": 0.9875868082046508,
|
|
"rewards/frontier_aurc_reward": -0.00158976421225816,
|
|
"rewards/frontier_coverage_0": -0.0026821551844477655,
|
|
"rewards/frontier_coverage_1": -0.0026821551844477655,
|
|
"rewards/frontier_coverage_10": -0.0026821551844477655,
|
|
"rewards/frontier_coverage_15": -0.0026821551844477655,
|
|
"rewards/frontier_coverage_20": -0.0026821551844477655,
|
|
"rewards/frontier_coverage_25": -0.0026821551844477655,
|
|
"rewards/frontier_coverage_5": -0.0026821551844477655,
|
|
"rewards/frontier_ece_reward": 0.015108131617307664,
|
|
"rewards/frontier_entropy_batch_reward": -0.16702641248703004,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15770399272441865,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2057257741689682,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.425,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07885199636220933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07885199636220933,
|
|
"signal/advantage_abs_mean": 0.09386955499649048,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09386955499649048,
|
|
"signal/advantage_pre_scale_std": 0.1530741721391678,
|
|
"signal/advantage_std": 0.1530741721391678,
|
|
"signal/brier_reward/centered_abs_mean": 0.15571836829185487,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8451388888888889,
|
|
"signal/brier_reward/group_std_mean": 0.19793447852134705,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015571837686002255,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015571837686002255,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03023452088236809,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04891353026032448,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030234521254897118,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030234521254897118,
|
|
"signal/format_reward/centered_abs_mean": 0.018983289785683154,
|
|
"signal/format_reward/group_bin_occupancy": 0.14340277777777777,
|
|
"signal/format_reward/group_std_mean": 0.03533447273075581,
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009491644892841577,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009491644892841577,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016145243542268872,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7020833333333334,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025344877038151028,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0181555009912698e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0181555009912698e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19334494173526764,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.842013888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.253934046626091,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19334494173526764,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.842013888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.253934046626091,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19334494173526764,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.842013888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.253934046626091,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19334494173526764,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.842013888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.253934046626091,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19334494173526764,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.842013888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.253934046626091,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19334494173526764,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.842013888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.253934046626091,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19334494173526764,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.842013888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.253934046626091,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002416811836883426,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.041843322664499284,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6902777777777777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05341664999723435,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004184332210570574,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004184332210570574,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22915047109127046,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7739583333333333,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.29721260666847227,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02291504740715027,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02291504740715027,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19007498936195838,
|
|
"calibration/batch_distribution_entropy": 0.9497641139330535,
|
|
"calibration/batch_entropy_100bins": 0.9439166973372888,
|
|
"calibration/batch_entropy_10bins": 0.9497641139330535,
|
|
"calibration/batch_entropy_50bins": 0.9538831122680052,
|
|
"calibration/batch_uniqueness": 0.9468067326996552,
|
|
"calibration/buffer_distribution_entropy": 0.94693078276862,
|
|
"calibration/buffer_entropy_100bins": 0.9496853046137248,
|
|
"calibration/buffer_entropy_10bins": 0.94693078276862,
|
|
"calibration/buffer_entropy_50bins": 0.9582811996332786,
|
|
"calibration/confidence_entropy": 0.4982449646222564,
|
|
"calibration/coverage@0%": 0.0068008289703315895,
|
|
"calibration/coverage@1%": 0.0068008289703315895,
|
|
"calibration/coverage@10%": 0.3090450479930192,
|
|
"calibration/coverage@15%": 0.3981652644399026,
|
|
"calibration/coverage@20%": 0.5950416503253889,
|
|
"calibration/coverage@25%": 0.7859338837229998,
|
|
"calibration/coverage@30%": 0.8696369763562011,
|
|
"calibration/coverage@5%": 0.16477966841186736,
|
|
"calibration/ece": 0.13885856134762334,
|
|
"calibration/mean_confidence": 0.6056185988614311,
|
|
"calibration/prompt_uniqueness": 0.860661265331734,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00980902777777779,
|
|
"completions/max_length": 3500.2,
|
|
"completions/max_terminated_length": 3500.2,
|
|
"completions/mean_length": 624.0658935546875,
|
|
"completions/mean_terminated_length": 630.3100341796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.00044128746958449483,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0094,
|
|
"num_tokens": 246482741.0,
|
|
"reward": 0.9921006441116333,
|
|
"reward_std": 0.12561330199241638,
|
|
"rewards/accuracy_reward": 0.6873263835906982,
|
|
"rewards/brier_reward": 0.7992384910583497,
|
|
"rewards/confidence_uniqueness_reward": 0.9376503348350524,
|
|
"rewards/format_reward": 0.9897569417953491,
|
|
"rewards/frontier_aurc_reward": -0.001279058470390737,
|
|
"rewards/frontier_coverage_0": 0.006591923534870148,
|
|
"rewards/frontier_coverage_1": 0.006591923534870148,
|
|
"rewards/frontier_coverage_10": 0.006591923534870148,
|
|
"rewards/frontier_coverage_15": 0.006591923534870148,
|
|
"rewards/frontier_coverage_20": 0.006591923534870148,
|
|
"rewards/frontier_coverage_25": 0.006591923534870148,
|
|
"rewards/frontier_coverage_5": 0.006591923534870148,
|
|
"rewards/frontier_ece_reward": 0.01775702629238367,
|
|
"rewards/frontier_entropy_batch_reward": -0.22466442584991456,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15937500298023224,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19687499999999997,
|
|
"signal/accuracy_reward/group_std_mean": 0.20706891417503356,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42499999403953553,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07968750149011612,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07968750149011612,
|
|
"signal/advantage_abs_mean": 0.09454565495252609,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09454565495252609,
|
|
"signal/advantage_pre_scale_std": 0.15264492034912108,
|
|
"signal/advantage_std": 0.15264492034912108,
|
|
"signal/brier_reward/centered_abs_mean": 0.1370186984539032,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8465277777777779,
|
|
"signal/brier_reward/group_std_mean": 0.17555441856384277,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013701869174838066,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013701869174838066,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030255821347236634,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8583333333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.047763481736183167,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030255821999162435,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030255821999162435,
|
|
"signal/format_reward/centered_abs_mean": 0.01732855923473835,
|
|
"signal/format_reward/group_bin_occupancy": 0.14166666666666666,
|
|
"signal/format_reward/group_std_mean": 0.03218508400022983,
|
|
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008664279617369175,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008664279617369175,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014055859064683318,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6861111111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002288359007798135,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.756982401275309e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.756982401275309e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1769299864768982,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8444444444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23164838552474976,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1769299864768982,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8444444444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23164838552474976,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1769299864768982,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8444444444444444,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23164838552474976,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1769299864768982,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8444444444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23164838552474976,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1769299864768982,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8444444444444444,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23164838552474976,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1769299864768982,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8444444444444444,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23164838552474976,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1769299864768982,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8444444444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23164838552474976,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002211624849587679,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.038429119437932965,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6680555555555555,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04894906431436539,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038429120555520057,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038429120555520057,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2667219638824463,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.79375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3330686569213867,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02667219564318657,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02667219564318657,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2570677552976662,
|
|
"calibration/batch_distribution_entropy": 0.9707298208948314,
|
|
"calibration/batch_entropy_100bins": 0.9563656907393726,
|
|
"calibration/batch_entropy_10bins": 0.9707298208948314,
|
|
"calibration/batch_entropy_50bins": 0.968625931346003,
|
|
"calibration/batch_uniqueness": 0.9509968355199117,
|
|
"calibration/buffer_distribution_entropy": 0.9483898179653849,
|
|
"calibration/buffer_entropy_100bins": 0.9520036670790271,
|
|
"calibration/buffer_entropy_10bins": 0.9483898179653849,
|
|
"calibration/buffer_entropy_50bins": 0.9599194290242906,
|
|
"calibration/confidence_entropy": 0.5047739615640212,
|
|
"calibration/coverage@0%": 0.020457317272494398,
|
|
"calibration/coverage@1%": 0.020457317272494398,
|
|
"calibration/coverage@10%": 0.08590234345050488,
|
|
"calibration/coverage@15%": 0.2708854907636981,
|
|
"calibration/coverage@20%": 0.43295266504226493,
|
|
"calibration/coverage@25%": 0.5562714025652029,
|
|
"calibration/coverage@30%": 0.6542660724253092,
|
|
"calibration/coverage@5%": 0.027263599995007482,
|
|
"calibration/ece": 0.15055879786288034,
|
|
"calibration/mean_confidence": 0.5460301453849368,
|
|
"calibration/prompt_uniqueness": 0.8671654591324399,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01692708333333335,
|
|
"completions/max_length": 3702.0,
|
|
"completions/max_terminated_length": 3702.0,
|
|
"completions/mean_length": 650.5223022460938,
|
|
"completions/mean_terminated_length": 661.8441528320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.4,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.0004091080045327544,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0137,
|
|
"num_tokens": 257101558.0,
|
|
"reward": 0.9774605631828308,
|
|
"reward_std": 0.134475240111351,
|
|
"rewards/accuracy_reward": 0.6628472208976746,
|
|
"rewards/brier_reward": 0.7783628225326538,
|
|
"rewards/confidence_uniqueness_reward": 0.9335785031318664,
|
|
"rewards/format_reward": 0.9829861044883728,
|
|
"rewards/frontier_aurc_reward": -0.0014200884848833083,
|
|
"rewards/frontier_coverage_0": 0.006945094745606184,
|
|
"rewards/frontier_coverage_1": 0.006945094745606184,
|
|
"rewards/frontier_coverage_10": 0.006945094745606184,
|
|
"rewards/frontier_coverage_15": 0.006945094745606184,
|
|
"rewards/frontier_coverage_20": 0.006945094745606184,
|
|
"rewards/frontier_coverage_25": 0.006945094745606184,
|
|
"rewards/frontier_coverage_5": 0.006945094745606184,
|
|
"rewards/frontier_ece_reward": 0.013097218424081802,
|
|
"rewards/frontier_entropy_batch_reward": -0.18549902439117433,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16636284589767455,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20243055555555559,
|
|
"signal/accuracy_reward/group_std_mean": 0.21947809755802156,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08318142294883728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08318142294883728,
|
|
"signal/advantage_abs_mean": 0.10097838938236237,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10097838938236237,
|
|
"signal/advantage_pre_scale_std": 0.16073089241981506,
|
|
"signal/advantage_std": 0.16073089241981506,
|
|
"signal/brier_reward/centered_abs_mean": 0.1482792615890503,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8479166666666668,
|
|
"signal/brier_reward/group_std_mean": 0.1903451293706894,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014827927015721798,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014827927015721798,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03590902425348759,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8461805555555555,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05420064702630043,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035909025464206934,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035909025464206934,
|
|
"signal/format_reward/centered_abs_mean": 0.02457682266831398,
|
|
"signal/format_reward/group_bin_occupancy": 0.14409722222222224,
|
|
"signal/format_reward/group_std_mean": 0.04063734821975231,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01228841133415699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01228841133415699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014170024311169981,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7017361111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002285012090578675,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7712531553115694e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7712531553115694e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19146940410137175,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333334,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.25203768312931063,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19146940410137175,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333334,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25203768312931063,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19146940410137175,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333334,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25203768312931063,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19146940410137175,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333334,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25203768312931063,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19146940410137175,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333334,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25203768312931063,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19146940410137175,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333334,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25203768312931063,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19146940410137175,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333334,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25203768312931063,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023933676537126304,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03642968088388443,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6868055555555557,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04717910811305046,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036429683677852154,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036429683677852154,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24993859529495238,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7871527777777778,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31895039677619935,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024993859976530076,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024993859976530076,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23095194989951517,
|
|
"calibration/batch_distribution_entropy": 0.9421492482556129,
|
|
"calibration/batch_entropy_100bins": 0.9440415676294579,
|
|
"calibration/batch_entropy_10bins": 0.9421492482556129,
|
|
"calibration/batch_entropy_50bins": 0.9520449592729043,
|
|
"calibration/batch_uniqueness": 0.9453759538189441,
|
|
"calibration/buffer_distribution_entropy": 0.950428047537104,
|
|
"calibration/buffer_entropy_100bins": 0.9543930678343209,
|
|
"calibration/buffer_entropy_10bins": 0.950428047537104,
|
|
"calibration/buffer_entropy_50bins": 0.9617481873791439,
|
|
"calibration/confidence_entropy": 0.47065056430076924,
|
|
"calibration/coverage@0%": 0.04189883140091326,
|
|
"calibration/coverage@1%": 0.04189883140091326,
|
|
"calibration/coverage@10%": 0.3034366940325464,
|
|
"calibration/coverage@15%": 0.3935276632602748,
|
|
"calibration/coverage@20%": 0.48246784032256607,
|
|
"calibration/coverage@25%": 0.5515483076447136,
|
|
"calibration/coverage@30%": 0.6815676706121045,
|
|
"calibration/coverage@5%": 0.17324618582756293,
|
|
"calibration/ece": 0.14784717956359078,
|
|
"calibration/mean_confidence": 0.6212378872019609,
|
|
"calibration/prompt_uniqueness": 0.8551414614959321,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011805555555555536,
|
|
"completions/max_length": 3302.8,
|
|
"completions/max_terminated_length": 3302.8,
|
|
"completions/mean_length": 626.293505859375,
|
|
"completions/mean_terminated_length": 633.8015014648438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 159.4,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.0004363918851595372,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.0106,
|
|
"num_tokens": 267409483.0,
|
|
"reward": 0.9906501173973083,
|
|
"reward_std": 0.12640073150396347,
|
|
"rewards/accuracy_reward": 0.6786458253860473,
|
|
"rewards/brier_reward": 0.7910825252532959,
|
|
"rewards/confidence_uniqueness_reward": 0.9383931279182434,
|
|
"rewards/format_reward": 0.9881076216697693,
|
|
"rewards/frontier_aurc_reward": -0.0013120988383889197,
|
|
"rewards/frontier_coverage_0": 0.014272965677082538,
|
|
"rewards/frontier_coverage_1": 0.014272965677082538,
|
|
"rewards/frontier_coverage_10": 0.014272965677082538,
|
|
"rewards/frontier_coverage_15": 0.014272965677082538,
|
|
"rewards/frontier_coverage_20": 0.014272965677082538,
|
|
"rewards/frontier_coverage_25": 0.014272965677082538,
|
|
"rewards/frontier_coverage_5": 0.014272965677082538,
|
|
"rewards/frontier_ece_reward": 0.016592884063720705,
|
|
"rewards/frontier_entropy_batch_reward": -0.18565942943096161,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15636935830116272,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19722222222222224,
|
|
"signal/accuracy_reward/group_std_mean": 0.20537342131137848,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42222222685813904,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07818467915058136,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07818467915058136,
|
|
"signal/advantage_abs_mean": 0.0925526574254036,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0925526574254036,
|
|
"signal/advantage_pre_scale_std": 0.15250625014305114,
|
|
"signal/advantage_std": 0.15250625014305114,
|
|
"signal/brier_reward/centered_abs_mean": 0.14853745102882385,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8354166666666668,
|
|
"signal/brier_reward/group_std_mean": 0.1917984515428543,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014853744953870773,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014853744953870773,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03174131475389004,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8378472222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05302174612879753,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031741314101964234,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031741314101964234,
|
|
"signal/format_reward/centered_abs_mean": 0.02053493894636631,
|
|
"signal/format_reward/group_bin_occupancy": 0.14583333333333334,
|
|
"signal/format_reward/group_std_mean": 0.039642113447189334,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010267469473183155,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010267469473183155,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014899963280186057,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6937500000000001,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002411051280796528,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.862495373643469e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.862495373643469e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19647094905376433,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.25708119869232177,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19647094905376433,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25708119869232177,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19647094905376433,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25708119869232177,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19647094905376433,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25708119869232177,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19647094905376433,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25708119869232177,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19647094905376433,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25708119869232177,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19647094905376433,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25708119869232177,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024558869190514088,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03865344226360321,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6791666666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04832939356565476,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038653444964438675,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038653444964438675,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23874907791614533,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773611111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3072973072528839,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023874907195568083,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023874907195568083,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1294049869245013,
|
|
"calibration/batch_distribution_entropy": 0.9557180391089094,
|
|
"calibration/batch_entropy_100bins": 0.9497484635160689,
|
|
"calibration/batch_entropy_10bins": 0.9557180391089094,
|
|
"calibration/batch_entropy_50bins": 0.9591875124129359,
|
|
"calibration/batch_uniqueness": 0.9468833336742613,
|
|
"calibration/buffer_distribution_entropy": 0.9549444448123883,
|
|
"calibration/buffer_entropy_100bins": 0.9602670469433393,
|
|
"calibration/buffer_entropy_10bins": 0.9549444448123883,
|
|
"calibration/buffer_entropy_50bins": 0.9660572803453091,
|
|
"calibration/confidence_entropy": 0.4833540273370396,
|
|
"calibration/coverage@0%": 0.05841797685887148,
|
|
"calibration/coverage@1%": 0.05841797685887148,
|
|
"calibration/coverage@10%": 0.48837748487367827,
|
|
"calibration/coverage@15%": 0.6872088995406774,
|
|
"calibration/coverage@20%": 0.8049094782025777,
|
|
"calibration/coverage@25%": 0.9049543537246117,
|
|
"calibration/coverage@30%": 0.9723005208262135,
|
|
"calibration/coverage@5%": 0.16348598057755775,
|
|
"calibration/ece": 0.12621167596611338,
|
|
"calibration/mean_confidence": 0.5960379852244188,
|
|
"calibration/prompt_uniqueness": 0.8497180192039384,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012586805555555558,
|
|
"completions/max_length": 3468.0,
|
|
"completions/max_terminated_length": 3468.0,
|
|
"completions/mean_length": 624.9230834960938,
|
|
"completions/mean_terminated_length": 632.8444091796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 187.2,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.00038522930117323995,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0106,
|
|
"num_tokens": 277712821.0,
|
|
"reward": 0.9888324856758117,
|
|
"reward_std": 0.12152263075113297,
|
|
"rewards/accuracy_reward": 0.6763020873069763,
|
|
"rewards/brier_reward": 0.7869715809822082,
|
|
"rewards/confidence_uniqueness_reward": 0.9377409458160401,
|
|
"rewards/format_reward": 0.9873264074325562,
|
|
"rewards/frontier_aurc_reward": -0.0012948142597451807,
|
|
"rewards/frontier_coverage_0": 0.013374109752476215,
|
|
"rewards/frontier_coverage_1": 0.013374109752476215,
|
|
"rewards/frontier_coverage_10": 0.013374109752476215,
|
|
"rewards/frontier_coverage_15": 0.013374109752476215,
|
|
"rewards/frontier_coverage_20": 0.013374109752476215,
|
|
"rewards/frontier_coverage_25": 0.013374109752476215,
|
|
"rewards/frontier_coverage_5": 0.013374109752476215,
|
|
"rewards/frontier_ece_reward": 0.013652277737855911,
|
|
"rewards/frontier_entropy_batch_reward": -0.17972289621829987,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1446994349360466,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1954861111111111,
|
|
"signal/accuracy_reward/group_std_mean": 0.19354265332221984,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43611112236976624,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0723497174680233,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0723497174680233,
|
|
"signal/advantage_abs_mean": 0.08829654604196549,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08829654604196549,
|
|
"signal/advantage_pre_scale_std": 0.1477721154689789,
|
|
"signal/advantage_std": 0.1477721154689789,
|
|
"signal/brier_reward/centered_abs_mean": 0.1490771532058716,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/brier_reward/group_std_mean": 0.1905330777168274,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014907715283334256,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014907715283334256,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031153790652751923,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.829861111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05239210352301597,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031153791584074496,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031153791584074496,
|
|
"signal/format_reward/centered_abs_mean": 0.019932725466787815,
|
|
"signal/format_reward/group_bin_occupancy": 0.14583333333333331,
|
|
"signal/format_reward/group_std_mean": 0.03907729685306549,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009966362733393908,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009966362733393908,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014448148664087056,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6923611111111112,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002320256642997265,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8060186630464158e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8060186630464158e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19429327845573424,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8440972222222222,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2543476581573486,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19429327845573424,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8440972222222222,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2543476581573486,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19429327845573424,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8440972222222222,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2543476581573486,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19429327845573424,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8440972222222222,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2543476581573486,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19429327845573424,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8440972222222222,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2543476581573486,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19429327845573424,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8440972222222222,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2543476581573486,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19429327845573424,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8440972222222222,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2543476581573486,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00242866612970829,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.035150817781686786,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6836805555555556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04459300860762596,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035150818061083556,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035150818061083556,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23712966442108155,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.779513888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3046766459941864,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023712967336177827,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023712967336177827,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18683794065072812,
|
|
"calibration/batch_distribution_entropy": 0.9806387486177448,
|
|
"calibration/batch_entropy_100bins": 0.9596139831547141,
|
|
"calibration/batch_entropy_10bins": 0.9806387486177448,
|
|
"calibration/batch_entropy_50bins": 0.9722668151489826,
|
|
"calibration/batch_uniqueness": 0.9519202784227648,
|
|
"calibration/buffer_distribution_entropy": 0.965098347204146,
|
|
"calibration/buffer_entropy_100bins": 0.9711153442674696,
|
|
"calibration/buffer_entropy_10bins": 0.965098347204146,
|
|
"calibration/buffer_entropy_50bins": 0.9745830308057396,
|
|
"calibration/confidence_entropy": 0.49438751115496604,
|
|
"calibration/coverage@0%": 0.023666604008716417,
|
|
"calibration/coverage@1%": 0.023666604008716417,
|
|
"calibration/coverage@10%": 0.34799730209956625,
|
|
"calibration/coverage@15%": 0.48327203315425693,
|
|
"calibration/coverage@20%": 0.6064102691678379,
|
|
"calibration/coverage@25%": 0.7282333990057179,
|
|
"calibration/coverage@30%": 0.8306165917741957,
|
|
"calibration/coverage@5%": 0.05673747015044871,
|
|
"calibration/ece": 0.14025309224842797,
|
|
"calibration/mean_confidence": 0.5135840354133834,
|
|
"calibration/prompt_uniqueness": 0.8570269969432882,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0109375,
|
|
"completions/max_length": 3256.0,
|
|
"completions/max_terminated_length": 3256.0,
|
|
"completions/mean_length": 611.480029296875,
|
|
"completions/mean_terminated_length": 618.2994384765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 192.4,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.0003621011273935437,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0102,
|
|
"num_tokens": 287821679.0,
|
|
"reward": 1.004742157459259,
|
|
"reward_std": 0.11290163099765778,
|
|
"rewards/accuracy_reward": 0.7052951335906983,
|
|
"rewards/brier_reward": 0.7879093527793884,
|
|
"rewards/confidence_uniqueness_reward": 0.9406887650489807,
|
|
"rewards/format_reward": 0.9889756917953492,
|
|
"rewards/frontier_aurc_reward": -0.0011246049660257995,
|
|
"rewards/frontier_coverage_0": -0.0045726167038083075,
|
|
"rewards/frontier_coverage_1": -0.0045726167038083075,
|
|
"rewards/frontier_coverage_10": -0.0045726167038083075,
|
|
"rewards/frontier_coverage_15": -0.0045726167038083075,
|
|
"rewards/frontier_coverage_20": -0.0045726167038083075,
|
|
"rewards/frontier_coverage_25": -0.005139388330280781,
|
|
"rewards/frontier_coverage_5": -0.0045726167038083075,
|
|
"rewards/frontier_ece_reward": 0.008927960135042667,
|
|
"rewards/frontier_entropy_batch_reward": -0.1572466716170311,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1355523034930229,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19305555555555556,
|
|
"signal/accuracy_reward/group_std_mean": 0.1837514191865921,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45555557012557985,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06777615174651146,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06777615174651146,
|
|
"signal/advantage_abs_mean": 0.08125371336936951,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08125371336936951,
|
|
"signal/advantage_pre_scale_std": 0.140377277135849,
|
|
"signal/advantage_std": 0.140377277135849,
|
|
"signal/brier_reward/centered_abs_mean": 0.14059088230133057,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8302083333333334,
|
|
"signal/brier_reward/group_std_mean": 0.18201070427894592,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014059088379144668,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014059088379144668,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029994430020451546,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8548611111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04794644489884377,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029994430486112835,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029994430486112835,
|
|
"signal/format_reward/centered_abs_mean": 0.01914605051279068,
|
|
"signal/format_reward/group_bin_occupancy": 0.14201388888888888,
|
|
"signal/format_reward/group_std_mean": 0.03469080775976181,
|
|
"signal/format_reward/group_zero_std_frac": 0.8638888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00957302525639534,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00957302525639534,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013646916137076913,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6895833333333333,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023204814875498413,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7058645971701482e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7058645971701482e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19198527336120605,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.821875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2514296382665634,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19198527336120605,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.821875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2514296382665634,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19198527336120605,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.821875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2514296382665634,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19198527336120605,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.821875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2514296382665634,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19198527336120605,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.821875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2514296382665634,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19039924442768097,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8211805555555557,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24940116107463836,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002379990741610527,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002379990741610527,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19198527336120605,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.821875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2514296382665634,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002399816084653139,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030152727663517,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6826388888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03811613321304321,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030152729246765374,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030152729246765374,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2264914721250534,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7663194444444444,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2937849909067154,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022649147361516953,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022649147361516953,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.168028061806691,
|
|
"calibration/batch_distribution_entropy": 0.9503921341637899,
|
|
"calibration/batch_entropy_100bins": 0.9454745464314603,
|
|
"calibration/batch_entropy_10bins": 0.9503921341637899,
|
|
"calibration/batch_entropy_50bins": 0.9544974811591391,
|
|
"calibration/batch_uniqueness": 0.9470963432074356,
|
|
"calibration/buffer_distribution_entropy": 0.973697032680677,
|
|
"calibration/buffer_entropy_100bins": 0.9799372478415842,
|
|
"calibration/buffer_entropy_10bins": 0.973697032680677,
|
|
"calibration/buffer_entropy_50bins": 0.9815119596059615,
|
|
"calibration/confidence_entropy": 0.46900084214722726,
|
|
"calibration/coverage@0%": 0.0677531953645879,
|
|
"calibration/coverage@1%": 0.08203890965030221,
|
|
"calibration/coverage@10%": 0.40898672177949313,
|
|
"calibration/coverage@15%": 0.5754278277850677,
|
|
"calibration/coverage@20%": 0.6407174776379441,
|
|
"calibration/coverage@25%": 0.6956564603220498,
|
|
"calibration/coverage@30%": 0.7747426797957127,
|
|
"calibration/coverage@5%": 0.3361056663766333,
|
|
"calibration/ece": 0.1727506317152436,
|
|
"calibration/mean_confidence": 0.5696691972139503,
|
|
"calibration/prompt_uniqueness": 0.8616723242905033,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009548611111111115,
|
|
"completions/max_length": 3496.8,
|
|
"completions/max_terminated_length": 3496.8,
|
|
"completions/mean_length": 664.9171875,
|
|
"completions/mean_terminated_length": 671.283251953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.4,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.00037436833372339606,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0077,
|
|
"num_tokens": 298591861.0,
|
|
"reward": 0.9980527639389039,
|
|
"reward_std": 0.12233888059854507,
|
|
"rewards/accuracy_reward": 0.6992187619209289,
|
|
"rewards/brier_reward": 0.802030611038208,
|
|
"rewards/confidence_uniqueness_reward": 0.9381472826004028,
|
|
"rewards/format_reward": 0.9902777791023254,
|
|
"rewards/frontier_aurc_reward": -0.001526768645271659,
|
|
"rewards/frontier_coverage_0": 0.011462044250220061,
|
|
"rewards/frontier_coverage_1": 0.011462044250220061,
|
|
"rewards/frontier_coverage_10": 0.011462044250220061,
|
|
"rewards/frontier_coverage_15": 0.011462044250220061,
|
|
"rewards/frontier_coverage_20": 0.01288942052051425,
|
|
"rewards/frontier_coverage_25": 0.03849505893886089,
|
|
"rewards/frontier_coverage_5": 0.011462044250220061,
|
|
"rewards/frontier_ece_reward": 0.007876492012292147,
|
|
"rewards/frontier_entropy_batch_reward": -0.22840518951416017,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14632704257965087,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1965962290763855,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.42500001192092896,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07316352128982544,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07316352128982544,
|
|
"signal/advantage_abs_mean": 0.08877704441547393,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08877704441547393,
|
|
"signal/advantage_pre_scale_std": 0.14888640940189363,
|
|
"signal/advantage_std": 0.14888640940189363,
|
|
"signal/brier_reward/centered_abs_mean": 0.14254556894302367,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8364583333333334,
|
|
"signal/brier_reward/group_std_mean": 0.18436427116394044,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014254557155072688,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014254557155072688,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029425183311104774,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8427083333333332,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05025056228041649,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002942518377676606,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002942518377676606,
|
|
"signal/format_reward/centered_abs_mean": 0.01655815988779068,
|
|
"signal/format_reward/group_bin_occupancy": 0.14479166666666668,
|
|
"signal/format_reward/group_std_mean": 0.034861961379647255,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00827907994389534,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00827907994389534,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00209680434782058,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6986111111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003620346961542964,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6210054784314707e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6210054784314707e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18070359230041505,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23828611075878142,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18070359230041505,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23828611075878142,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18070359230041505,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23828611075878142,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18070359230041505,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23828611075878142,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17324572205543518,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8274305555555556,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22867600619792938,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021655716467648745,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021655716467648745,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08664727360010147,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8840277777777779,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1133154422044754,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010830909595824778,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010830909595824778,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18070359230041505,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23828611075878142,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002258794941008091,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.025348235666751862,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7229166666666668,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.032069115340709685,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025348236784338953,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025348236784338953,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2727514892816544,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7552083333333334,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34558807611465453,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027275149524211884,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027275149524211884,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.14901822974262594,
|
|
"eval_calibration/batch_distribution_entropy": 0.9304030834834719,
|
|
"eval_calibration/batch_entropy_100bins": 0.6979890618590382,
|
|
"eval_calibration/batch_entropy_10bins": 0.9304030834834719,
|
|
"eval_calibration/batch_entropy_50bins": 0.7766607817845022,
|
|
"eval_calibration/batch_uniqueness": 0.8930084974900759,
|
|
"eval_calibration/buffer_distribution_entropy": 0.97765497693518,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9841955955582375,
|
|
"eval_calibration/buffer_entropy_10bins": 0.97765497693518,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9847609370459999,
|
|
"eval_calibration/confidence_entropy": 0.4936314506498249,
|
|
"eval_calibration/coverage@0%": 0.26278001792114697,
|
|
"eval_calibration/coverage@1%": 0.26278001792114697,
|
|
"eval_calibration/coverage@10%": 0.45868055555555554,
|
|
"eval_calibration/coverage@15%": 0.6025649641577061,
|
|
"eval_calibration/coverage@20%": 0.7132840501792114,
|
|
"eval_calibration/coverage@25%": 0.9083893369175627,
|
|
"eval_calibration/coverage@30%": 0.9567988351254479,
|
|
"eval_calibration/coverage@5%": 0.3044466845878136,
|
|
"eval_calibration/ece": 0.2226041519685852,
|
|
"eval_calibration/mean_confidence": 0.5655246136044548,
|
|
"eval_calibration/prompt_uniqueness": 0.8930084974900759,
|
|
"eval_completions/clipped_ratio": 0.010416666666666666,
|
|
"eval_completions/max_length": 2470.5,
|
|
"eval_completions/max_terminated_length": 2470.5,
|
|
"eval_completions/mean_length": 634.5142517089844,
|
|
"eval_completions/mean_terminated_length": 641.2839864095052,
|
|
"eval_completions/min_length": 52.5,
|
|
"eval_completions/min_terminated_length": 234.16666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 298591861.0,
|
|
"eval_reward": 0.9375461836655935,
|
|
"eval_reward_std": 0.23832263300816217,
|
|
"eval_rewards/accuracy_reward": 0.6762152711550394,
|
|
"eval_rewards/brier_reward": 0.782086193561554,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8867801527182261,
|
|
"eval_rewards/format_reward": 0.987847218910853,
|
|
"eval_rewards/frontier_aurc_reward": -0.0018945778623068084,
|
|
"eval_rewards/frontier_coverage_0": 0.008154223828266064,
|
|
"eval_rewards/frontier_coverage_1": 0.008154223828266064,
|
|
"eval_rewards/frontier_coverage_10": 0.008154223828266064,
|
|
"eval_rewards/frontier_coverage_15": 0.008154223828266064,
|
|
"eval_rewards/frontier_coverage_20": 0.013963257893919945,
|
|
"eval_rewards/frontier_coverage_25": 0.05141168336073557,
|
|
"eval_rewards/frontier_coverage_5": 0.008154223828266064,
|
|
"eval_rewards/frontier_ece_reward": 0.004938475166757901,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6316870252291361,
|
|
"eval_runtime": 214.6923,
|
|
"eval_samples_per_second": 4.658,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4254014740387599,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4673873384793599,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21270073701937994,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21270073701937994,
|
|
"eval_signal/advantage_abs_mean": 0.20312496026357016,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20312496026357016,
|
|
"eval_signal/advantage_pre_scale_std": 0.23702458292245865,
|
|
"eval_signal/advantage_std": 0.23702458292245865,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.202142134308815,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8888888888888888,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2583857501546542,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020214214610556763,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.020214214610556763,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05369566256801287,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3923611111111111,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08944027374188106,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005369566303367416,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005369566303367416,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.023328992693374555,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.16666666666666666,
|
|
"eval_signal/format_reward/group_std_mean": 0.06276767483601968,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.6666666865348816,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011664496346687278,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.011664496346687278,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0031803955983680985,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5972222222222222,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007048736986083289,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9754943524409704e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9754943524409704e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.28077225387096405,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9374999999999999,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.39534174899260205,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.28077225387096405,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9374999999999999,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.39534174899260205,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.28077225387096405,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9374999999999999,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.39534174899260205,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.28077225387096405,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9374999999999999,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.39534174899260205,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.21713952968517938,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8993055555555557,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3139382104078929,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027142442607631287,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027142442607631287,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08837362627188365,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.954861111111111,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.1131880668302377,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011046703827256958,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011046703827256958,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.28077225387096405,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9374999999999999,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.39534174899260205,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035096531501039863,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03151553000013033,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.042221867789824806,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031515529456858835,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031515529456858835,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3152608970801036,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2916666666666667,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3325229287147522,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03152609150856733,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03152609150856733,
|
|
"eval_steps_per_second": 0.028,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12418416862657974,
|
|
"calibration/batch_distribution_entropy": 0.9738042618432547,
|
|
"calibration/batch_entropy_100bins": 0.959685408987745,
|
|
"calibration/batch_entropy_10bins": 0.9738042618432547,
|
|
"calibration/batch_entropy_50bins": 0.9715188105067746,
|
|
"calibration/batch_uniqueness": 0.9511053370700557,
|
|
"calibration/buffer_distribution_entropy": 0.9796740268447254,
|
|
"calibration/buffer_entropy_100bins": 0.9863859099776396,
|
|
"calibration/buffer_entropy_10bins": 0.9796740268447254,
|
|
"calibration/buffer_entropy_50bins": 0.986449386820567,
|
|
"calibration/confidence_entropy": 0.4959676103167527,
|
|
"calibration/coverage@0%": 0.05926240634931017,
|
|
"calibration/coverage@1%": 0.05926240634931017,
|
|
"calibration/coverage@10%": 0.6127707884216798,
|
|
"calibration/coverage@15%": 0.7128341997211541,
|
|
"calibration/coverage@20%": 0.8080721228110945,
|
|
"calibration/coverage@25%": 0.8990786433912484,
|
|
"calibration/coverage@30%": 0.9890339425587467,
|
|
"calibration/coverage@5%": 0.3137256346668646,
|
|
"calibration/ece": 0.21717115584354518,
|
|
"calibration/mean_confidence": 0.5605943705821324,
|
|
"calibration/prompt_uniqueness": 0.8582839084278222,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009027777777777768,
|
|
"completions/max_length": 3711.8,
|
|
"completions/max_terminated_length": 3711.8,
|
|
"completions/mean_length": 620.5934936523438,
|
|
"completions/mean_terminated_length": 626.3040405273438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 187.2,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.00040308100869879127,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0072,
|
|
"num_tokens": 308848810.0,
|
|
"reward": 1.0218815565109254,
|
|
"reward_std": 0.12359268218278885,
|
|
"rewards/accuracy_reward": 0.7443576335906983,
|
|
"rewards/brier_reward": 0.7995624542236328,
|
|
"rewards/confidence_uniqueness_reward": 0.9407005667686462,
|
|
"rewards/format_reward": 0.9903645873069763,
|
|
"rewards/frontier_aurc_reward": -0.0011058273608796298,
|
|
"rewards/frontier_coverage_0": -0.020049982517957688,
|
|
"rewards/frontier_coverage_1": -0.020049982517957688,
|
|
"rewards/frontier_coverage_10": -0.020049982517957688,
|
|
"rewards/frontier_coverage_15": -0.020049982517957688,
|
|
"rewards/frontier_coverage_20": 0.007909675501286984,
|
|
"rewards/frontier_coverage_25": 0.07766608744859696,
|
|
"rewards/frontier_coverage_5": -0.020049982517957688,
|
|
"rewards/frontier_ece_reward": 0.0013356797680899035,
|
|
"rewards/frontier_entropy_batch_reward": -0.19442155659198762,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1566785991191864,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19791666666666666,
|
|
"signal/accuracy_reward/group_std_mean": 0.20641724467277528,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0783392995595932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0783392995595932,
|
|
"signal/advantage_abs_mean": 0.09009798169136048,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09009798169136048,
|
|
"signal/advantage_pre_scale_std": 0.15116022229194642,
|
|
"signal/advantage_std": 0.15116022229194642,
|
|
"signal/brier_reward/centered_abs_mean": 0.14188904762268068,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8482638888888889,
|
|
"signal/brier_reward/group_std_mean": 0.18152420222759247,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014188905246555805,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014188905246555805,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028706640005111694,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8447916666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04965458139777183,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002870664047077298,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002870664047077298,
|
|
"signal/format_reward/centered_abs_mean": 0.01727973110973835,
|
|
"signal/format_reward/group_bin_occupancy": 0.14513888888888887,
|
|
"signal/format_reward/group_std_mean": 0.03610437363386154,
|
|
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008639865554869175,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008639865554869175,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016173893585801125,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7194444444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002777449763379991,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.021736818278441e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.021736818278441e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19061762392520903,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8347222222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2524797976016998,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19061762392520903,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8347222222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2524797976016998,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19061762392520903,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8347222222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2524797976016998,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19061762392520903,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8347222222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2524797976016998,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10665316879749298,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8378472222222222,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1441801980137825,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013331646099686623,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013331646099686623,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07222038358449936,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9309027777777776,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0917926698923111,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009027547785080969,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009027547785080969,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19061762392520903,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8347222222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2524797976016998,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002382720448076725,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023065327480435372,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7246527777777778,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02965252809226513,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023065326735377313,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023065326735377313,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24839998483657838,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7788194444444445,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31682642698287966,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02484000064432621,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02484000064432621,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.13573148767459628,
|
|
"calibration/batch_distribution_entropy": 0.9496131093908463,
|
|
"calibration/batch_entropy_100bins": 0.9478886953307164,
|
|
"calibration/batch_entropy_10bins": 0.9496131093908463,
|
|
"calibration/batch_entropy_50bins": 0.9558848076576348,
|
|
"calibration/batch_uniqueness": 0.9477837762673808,
|
|
"calibration/buffer_distribution_entropy": 0.9833397575756475,
|
|
"calibration/buffer_entropy_100bins": 0.9902063640434747,
|
|
"calibration/buffer_entropy_10bins": 0.9833397575756475,
|
|
"calibration/buffer_entropy_50bins": 0.9895285524682471,
|
|
"calibration/confidence_entropy": 0.5064340839226602,
|
|
"calibration/coverage@0%": 0.06199548520452567,
|
|
"calibration/coverage@1%": 0.100016318537859,
|
|
"calibration/coverage@10%": 0.4863422228857656,
|
|
"calibration/coverage@15%": 0.7070152931318631,
|
|
"calibration/coverage@20%": 0.8079925379000178,
|
|
"calibration/coverage@25%": 0.8801128926701571,
|
|
"calibration/coverage@30%": 0.9193717277486911,
|
|
"calibration/coverage@5%": 0.3584050596726108,
|
|
"calibration/ece": 0.1611499914834087,
|
|
"calibration/mean_confidence": 0.6053540170302975,
|
|
"calibration/prompt_uniqueness": 0.8627381388571653,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013194444444444443,
|
|
"completions/max_length": 3444.2,
|
|
"completions/max_terminated_length": 3444.2,
|
|
"completions/mean_length": 622.8822998046875,
|
|
"completions/mean_terminated_length": 631.2034301757812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 193.8,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.0003652535378932953,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0111,
|
|
"num_tokens": 319111710.0,
|
|
"reward": 0.988642418384552,
|
|
"reward_std": 0.1208130583167076,
|
|
"rewards/accuracy_reward": 0.6771701335906982,
|
|
"rewards/brier_reward": 0.7907926917076111,
|
|
"rewards/confidence_uniqueness_reward": 0.9372875452041626,
|
|
"rewards/format_reward": 0.9866319298744202,
|
|
"rewards/frontier_aurc_reward": -0.0019396688556298613,
|
|
"rewards/frontier_coverage_0": 0.009741135686635972,
|
|
"rewards/frontier_coverage_1": 0.009741135686635972,
|
|
"rewards/frontier_coverage_10": 0.009741135686635972,
|
|
"rewards/frontier_coverage_15": 0.01013163048774004,
|
|
"rewards/frontier_coverage_20": 0.027610554732382296,
|
|
"rewards/frontier_coverage_25": 0.08821047395467758,
|
|
"rewards/frontier_coverage_5": 0.009741135686635972,
|
|
"rewards/frontier_ece_reward": 0.0028510759511846118,
|
|
"rewards/frontier_entropy_batch_reward": -0.18388957977294923,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14061957597732544,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19201388888888887,
|
|
"signal/accuracy_reward/group_std_mean": 0.1869141399860382,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07030978798866272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07030978798866272,
|
|
"signal/advantage_abs_mean": 0.08863120973110199,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08863120973110199,
|
|
"signal/advantage_pre_scale_std": 0.15098720490932466,
|
|
"signal/advantage_std": 0.15098720490932466,
|
|
"signal/brier_reward/centered_abs_mean": 0.14210671186447144,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8583333333333334,
|
|
"signal/brier_reward/group_std_mean": 0.18185594975948333,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014210670255124569,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014210670255124569,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033150676265358926,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.846875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.052501931041479113,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003315067803487182,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003315067803487182,
|
|
"signal/format_reward/centered_abs_mean": 0.02215711772441864,
|
|
"signal/format_reward/group_bin_occupancy": 0.14409722222222224,
|
|
"signal/format_reward/group_std_mean": 0.03936988487839699,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222208976745,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01107855886220932,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01107855886220932,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002279521874152124,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6902777777777778,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038658153265714646,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8494023717939852e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8494023717939852e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17956892549991607,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23461733758449554,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17956892549991607,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23461733758449554,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17956892549991607,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23461733758449554,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17778740525245668,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8458333333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2324183076620102,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00222234264947474,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00222234264947474,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06870782449841499,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9017361111111111,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0908221110701561,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008588478667661548,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008588478667661548,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0829654261469841,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9145833333333334,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10622318387031555,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010370678268373013,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010370678268373013,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17956892549991607,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23461733758449554,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002244611643254757,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02158619686961174,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7381944444444445,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.027624867483973505,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021586197894066573,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021586197894066573,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23778702020645143,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7749999999999999,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3028127193450928,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023778701573610304,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023778701573610304,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15272836679211094,
|
|
"calibration/batch_distribution_entropy": 0.9627599417273558,
|
|
"calibration/batch_entropy_100bins": 0.9522571385844725,
|
|
"calibration/batch_entropy_10bins": 0.9627599417273558,
|
|
"calibration/batch_entropy_50bins": 0.9613007206497416,
|
|
"calibration/batch_uniqueness": 0.9484824117838434,
|
|
"calibration/buffer_distribution_entropy": 0.9855117090133282,
|
|
"calibration/buffer_entropy_100bins": 0.9919619031900998,
|
|
"calibration/buffer_entropy_10bins": 0.9855117090133282,
|
|
"calibration/buffer_entropy_50bins": 0.9911634713514934,
|
|
"calibration/confidence_entropy": 0.4857673257774547,
|
|
"calibration/coverage@0%": 0.049065118846368376,
|
|
"calibration/coverage@1%": 0.07951131307209017,
|
|
"calibration/coverage@10%": 0.5366995054961048,
|
|
"calibration/coverage@15%": 0.6177436479560819,
|
|
"calibration/coverage@20%": 0.6741639757050029,
|
|
"calibration/coverage@25%": 0.7182050523398605,
|
|
"calibration/coverage@30%": 0.8612321694819425,
|
|
"calibration/coverage@5%": 0.28688341146053287,
|
|
"calibration/ece": 0.1691076617981678,
|
|
"calibration/mean_confidence": 0.5515990495421639,
|
|
"calibration/prompt_uniqueness": 0.8613246886758482,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01640625000000002,
|
|
"completions/max_length": 3573.0,
|
|
"completions/max_terminated_length": 3573.0,
|
|
"completions/mean_length": 642.81328125,
|
|
"completions/mean_terminated_length": 653.6934326171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 206.4,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.00042818221845664084,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.012,
|
|
"num_tokens": 329655991.0,
|
|
"reward": 0.9806491017341614,
|
|
"reward_std": 0.12677238285541534,
|
|
"rewards/accuracy_reward": 0.6590277671813964,
|
|
"rewards/brier_reward": 0.7924768686294555,
|
|
"rewards/confidence_uniqueness_reward": 0.9346436381340026,
|
|
"rewards/format_reward": 0.983506953716278,
|
|
"rewards/frontier_aurc_reward": -0.001722504827193916,
|
|
"rewards/frontier_coverage_0": 0.028906658757478,
|
|
"rewards/frontier_coverage_1": 0.028906658757478,
|
|
"rewards/frontier_coverage_10": 0.028906658757478,
|
|
"rewards/frontier_coverage_15": 0.031030337116681038,
|
|
"rewards/frontier_coverage_20": 0.0429856464266777,
|
|
"rewards/frontier_coverage_25": 0.10405687540769577,
|
|
"rewards/frontier_coverage_5": 0.028906658757478,
|
|
"rewards/frontier_ece_reward": 0.003673038515262306,
|
|
"rewards/frontier_entropy_batch_reward": -0.17347353994846343,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14384765326976776,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19444444444444445,
|
|
"signal/accuracy_reward/group_std_mean": 0.1913081645965576,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44444444179534914,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07192382663488388,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07192382663488388,
|
|
"signal/advantage_abs_mean": 0.09211225062608719,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09211225062608719,
|
|
"signal/advantage_pre_scale_std": 0.15569303929805756,
|
|
"signal/advantage_std": 0.15569303929805756,
|
|
"signal/brier_reward/centered_abs_mean": 0.1462089329957962,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8246527777777779,
|
|
"signal/brier_reward/group_std_mean": 0.18975663781166077,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014620893821120261,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014620893821120261,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03635745905339718,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.820486111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05932655856013298,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003635745914652944,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003635745914652944,
|
|
"signal/format_reward/centered_abs_mean": 0.02596571184694767,
|
|
"signal/format_reward/group_bin_occupancy": 0.1486111111111111,
|
|
"signal/format_reward/group_std_mean": 0.04700228720903397,
|
|
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012982855923473835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012982855923473835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002158830175176263,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6850694444444445,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0039873755071312186,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6985378281096927e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6985378281096927e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19089278280735017,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8347222222222224,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2473309278488159,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19089278280735017,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8347222222222224,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2473309278488159,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19089278280735017,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8347222222222224,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2473309278488159,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1793476462364197,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8326388888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2329329788684845,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002241845661774278,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002241845661774278,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06690727174282074,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9097222222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08647293150424958,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008363408851437271,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008363408851437271,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0914057046175003,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9003472222222222,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1181455373764038,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011425713310018182,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011425713310018182,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19089278280735017,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8347222222222224,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2473309278488159,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023861598689109086,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022229710966348647,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7388888888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02815890610218048,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022229711525142194,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022229711525142194,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23444273173809052,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.30144866108894347,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02344427481293678,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02344427481293678,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1350255119426556,
|
|
"calibration/batch_distribution_entropy": 0.9467153136348341,
|
|
"calibration/batch_entropy_100bins": 0.9448354422616114,
|
|
"calibration/batch_entropy_10bins": 0.9467153136348341,
|
|
"calibration/batch_entropy_50bins": 0.9524946276164183,
|
|
"calibration/batch_uniqueness": 0.9455724587067573,
|
|
"calibration/buffer_distribution_entropy": 0.9863591468776306,
|
|
"calibration/buffer_entropy_100bins": 0.9924262206930579,
|
|
"calibration/buffer_entropy_10bins": 0.9863591468776306,
|
|
"calibration/buffer_entropy_50bins": 0.9916857227941385,
|
|
"calibration/confidence_entropy": 0.4943049964540047,
|
|
"calibration/coverage@0%": 0.10312881092467825,
|
|
"calibration/coverage@1%": 0.10312881092467825,
|
|
"calibration/coverage@10%": 0.46204870130918313,
|
|
"calibration/coverage@15%": 0.5477847318292854,
|
|
"calibration/coverage@20%": 0.7477854206561981,
|
|
"calibration/coverage@25%": 0.833632965941813,
|
|
"calibration/coverage@30%": 0.9413540434344725,
|
|
"calibration/coverage@5%": 0.3250324397385173,
|
|
"calibration/ece": 0.13538983337137994,
|
|
"calibration/mean_confidence": 0.6091723019901613,
|
|
"calibration/prompt_uniqueness": 0.8639295314719903,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011979166666666652,
|
|
"completions/max_length": 3396.8,
|
|
"completions/max_terminated_length": 3396.8,
|
|
"completions/mean_length": 623.0584350585938,
|
|
"completions/mean_terminated_length": 630.6281982421875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 185.2,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.00041778094600886106,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0105,
|
|
"num_tokens": 339922808.0,
|
|
"reward": 1.0096432447433472,
|
|
"reward_std": 0.1250714048743248,
|
|
"rewards/accuracy_reward": 0.7217013955116272,
|
|
"rewards/brier_reward": 0.8008638501167298,
|
|
"rewards/confidence_uniqueness_reward": 0.9373578429222107,
|
|
"rewards/format_reward": 0.9880208373069763,
|
|
"rewards/frontier_aurc_reward": -0.00128103963797912,
|
|
"rewards/frontier_coverage_0": -0.011224100925028324,
|
|
"rewards/frontier_coverage_1": -0.011224100925028324,
|
|
"rewards/frontier_coverage_10": -0.011125411931425333,
|
|
"rewards/frontier_coverage_15": 0.000582283828407526,
|
|
"rewards/frontier_coverage_20": 0.05009397864341736,
|
|
"rewards/frontier_coverage_25": 0.1337550863623619,
|
|
"rewards/frontier_coverage_5": -0.011224100925028324,
|
|
"rewards/frontier_ece_reward": -0.0010974591568810865,
|
|
"rewards/frontier_entropy_batch_reward": -0.2065970182418823,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14814453125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19479166666666667,
|
|
"signal/accuracy_reward/group_std_mean": 0.19610781967639923,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44166667461395265,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.074072265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.074072265625,
|
|
"signal/advantage_abs_mean": 0.0921988844871521,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0921988844871521,
|
|
"signal/advantage_pre_scale_std": 0.15291462242603301,
|
|
"signal/advantage_std": 0.15291462242603301,
|
|
"signal/brier_reward/centered_abs_mean": 0.14049543142318727,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8541666666666666,
|
|
"signal/brier_reward/group_std_mean": 0.17984696626663207,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01404954344034195,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01404954344034195,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0325088482350111,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.834375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05347518250346184,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032508848700672386,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032508848700672386,
|
|
"signal/format_reward/centered_abs_mean": 0.020540364272892474,
|
|
"signal/format_reward/group_bin_occupancy": 0.1454861111111111,
|
|
"signal/format_reward/group_std_mean": 0.039224734902381896,
|
|
"signal/format_reward/group_zero_std_frac": 0.8361111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010270182136446237,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010270182136446237,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017802180489525199,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6770833333333333,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003222810197621584,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2252726193983107e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2252726193983107e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1828035831451416,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8583333333333334,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2377503514289856,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022850447334349156,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022850447334349156,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1828035831451416,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8583333333333334,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2377503514289856,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022850447334349156,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022850447334349156,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18264002799987794,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8586805555555556,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2375439763069153,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022830002941191196,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022830002941191196,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15461563766002656,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8496527777777778,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20210520327091216,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019326955080032349,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019326955080032349,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06322543397545814,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9326388888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08029639273881913,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007903179153800011,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007903179153800011,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.101860611140728,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9097222222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1299730733036995,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012732576811686157,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012732576811686157,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1828035831451416,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8583333333333334,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2377503514289856,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022850447334349156,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022850447334349156,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02175499051809311,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7347222222222223,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02739621587097645,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021754990331828592,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021754990331828592,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25911190211772916,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7975694444444444,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3297302842140198,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02591119073331356,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02591119073331356,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11596802537585997,
|
|
"calibration/batch_distribution_entropy": 0.9772162270629645,
|
|
"calibration/batch_entropy_100bins": 0.9590083538748349,
|
|
"calibration/batch_entropy_10bins": 0.9772162270629645,
|
|
"calibration/batch_entropy_50bins": 0.9696121620848036,
|
|
"calibration/batch_uniqueness": 0.9508310218025986,
|
|
"calibration/buffer_distribution_entropy": 0.985610419410986,
|
|
"calibration/buffer_entropy_100bins": 0.9920422218752784,
|
|
"calibration/buffer_entropy_10bins": 0.985610419410986,
|
|
"calibration/buffer_entropy_50bins": 0.9912291646995455,
|
|
"calibration/confidence_entropy": 0.4961519915892487,
|
|
"calibration/coverage@0%": 0.08600478646452267,
|
|
"calibration/coverage@1%": 0.08600478646452267,
|
|
"calibration/coverage@10%": 0.4732605731669655,
|
|
"calibration/coverage@15%": 0.7201678681063644,
|
|
"calibration/coverage@20%": 0.8435295212461836,
|
|
"calibration/coverage@25%": 0.9484174474094432,
|
|
"calibration/coverage@30%": 0.9905013192612138,
|
|
"calibration/coverage@5%": 0.30627206208368407,
|
|
"calibration/ece": 0.18982087542522702,
|
|
"calibration/mean_confidence": 0.5509683002177224,
|
|
"calibration/prompt_uniqueness": 0.8649400485298095,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013888888888888905,
|
|
"completions/max_length": 3743.8,
|
|
"completions/max_terminated_length": 3743.8,
|
|
"completions/mean_length": 650.516845703125,
|
|
"completions/mean_terminated_length": 659.7272583007813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.4,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.0004142906400375068,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.0114,
|
|
"num_tokens": 350524730.0,
|
|
"reward": 1.0014106631278992,
|
|
"reward_std": 0.12218387722969055,
|
|
"rewards/accuracy_reward": 0.7021701455116272,
|
|
"rewards/brier_reward": 0.7892498970031738,
|
|
"rewards/confidence_uniqueness_reward": 0.9369961380958557,
|
|
"rewards/format_reward": 0.9857638835906982,
|
|
"rewards/frontier_aurc_reward": -0.001478305645287037,
|
|
"rewards/frontier_coverage_0": -0.006911272555589676,
|
|
"rewards/frontier_coverage_1": -0.006911272555589676,
|
|
"rewards/frontier_coverage_10": -0.006758286617696285,
|
|
"rewards/frontier_coverage_15": 0.0018855141475796699,
|
|
"rewards/frontier_coverage_20": 0.05244411379098892,
|
|
"rewards/frontier_coverage_25": 0.13218926042318344,
|
|
"rewards/frontier_coverage_5": -0.006911272555589676,
|
|
"rewards/frontier_ece_reward": -0.00075130017939955,
|
|
"rewards/frontier_entropy_batch_reward": -0.1707520604133606,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1440049946308136,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19618055555555555,
|
|
"signal/accuracy_reward/group_std_mean": 0.19517480432987214,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43055556416511537,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0720024973154068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0720024973154068,
|
|
"signal/advantage_abs_mean": 0.08900740891695022,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08900740891695022,
|
|
"signal/advantage_pre_scale_std": 0.15330225825309754,
|
|
"signal/advantage_std": 0.15330225825309754,
|
|
"signal/brier_reward/centered_abs_mean": 0.14266086518764495,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8486111111111111,
|
|
"signal/brier_reward/group_std_mean": 0.18242388367652893,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01426608581095934,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01426608581095934,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03368383906781673,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8496527777777777,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.052182822674512866,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003368384018540382,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003368384018540382,
|
|
"signal/format_reward/centered_abs_mean": 0.022667100839316844,
|
|
"signal/format_reward/group_bin_occupancy": 0.14340277777777777,
|
|
"signal/format_reward/group_std_mean": 0.03894899114966392,
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011333550419658422,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011333550419658422,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019595161313191055,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6739583333333332,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003604071820154786,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.449395142321009e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.449395142321009e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18813599050045013,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8409722222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24627106189727782,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023517000023275613,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023517000023275613,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18813599050045013,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8409722222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24627106189727782,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023517000023275613,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023517000023275613,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18753766417503356,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8399305555555555,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24552586376667024,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002344220783561468,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002344220783561468,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14854539334774017,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.834375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19552876353263854,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018568174680694937,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018568174680694937,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06628052592277527,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9263888888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08446042537689209,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008285066462121904,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008285066462121904,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10350020378828048,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8947916666666668,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13349340260028839,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012937525752931833,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012937525752931833,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18813599050045013,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8409722222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24627106189727782,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023517000023275613,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023517000023275613,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.021230778843164443,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7291666666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02696690522134304,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002123078005388379,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002123078005388379,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2259067177772522,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.775,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.28947545886039733,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022590672224760057,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022590672224760057,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10141793628292742,
|
|
"calibration/batch_distribution_entropy": 0.97102851134233,
|
|
"calibration/batch_entropy_100bins": 0.9559282496757466,
|
|
"calibration/batch_entropy_10bins": 0.97102851134233,
|
|
"calibration/batch_entropy_50bins": 0.9685106670847736,
|
|
"calibration/batch_uniqueness": 0.9508384189223701,
|
|
"calibration/buffer_distribution_entropy": 0.9859982317210336,
|
|
"calibration/buffer_entropy_100bins": 0.9922246706460343,
|
|
"calibration/buffer_entropy_10bins": 0.9859982317210336,
|
|
"calibration/buffer_entropy_50bins": 0.9914588219746386,
|
|
"calibration/confidence_entropy": 0.49898035500245025,
|
|
"calibration/coverage@0%": 0.06338339682072068,
|
|
"calibration/coverage@1%": 0.13005006348738732,
|
|
"calibration/coverage@10%": 0.5578450042867328,
|
|
"calibration/coverage@15%": 0.8041507799553624,
|
|
"calibration/coverage@20%": 0.9102453475329633,
|
|
"calibration/coverage@25%": 0.9551202557445988,
|
|
"calibration/coverage@30%": 0.9805774278215222,
|
|
"calibration/coverage@5%": 0.2749898127789967,
|
|
"calibration/ece": 0.1917915387935666,
|
|
"calibration/mean_confidence": 0.5808151819529532,
|
|
"calibration/prompt_uniqueness": 0.8569296263730571,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014409722222222232,
|
|
"completions/max_length": 3805.4,
|
|
"completions/max_terminated_length": 3805.4,
|
|
"completions/mean_length": 637.946533203125,
|
|
"completions/mean_terminated_length": 647.2637451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 199.8,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.0003819975827354938,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0126,
|
|
"num_tokens": 360973842.0,
|
|
"reward": 1.0002532005310059,
|
|
"reward_std": 0.12911737263202666,
|
|
"rewards/accuracy_reward": 0.7065972208976745,
|
|
"rewards/brier_reward": 0.7953470587730408,
|
|
"rewards/confidence_uniqueness_reward": 0.9346136093139649,
|
|
"rewards/format_reward": 0.9855902910232544,
|
|
"rewards/frontier_aurc_reward": -0.0019531417870894074,
|
|
"rewards/frontier_coverage_0": -0.002460658084601164,
|
|
"rewards/frontier_coverage_1": -0.002460658084601164,
|
|
"rewards/frontier_coverage_10": -0.002041639015078545,
|
|
"rewards/frontier_coverage_15": 0.008680144883692264,
|
|
"rewards/frontier_coverage_20": 0.06160227060317993,
|
|
"rewards/frontier_coverage_25": 0.1450663238763809,
|
|
"rewards/frontier_coverage_5": -0.002460658084601164,
|
|
"rewards/frontier_ece_reward": -0.0004450877895578742,
|
|
"rewards/frontier_entropy_batch_reward": -0.21341712474823,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15309244394302368,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19340277777777778,
|
|
"signal/accuracy_reward/group_std_mean": 0.19739371538162231,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07654622197151184,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07654622197151184,
|
|
"signal/advantage_abs_mean": 0.09620479941368103,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09620479941368103,
|
|
"signal/advantage_pre_scale_std": 0.16078065931797028,
|
|
"signal/advantage_std": 0.16078065931797028,
|
|
"signal/brier_reward/centered_abs_mean": 0.13989888727664948,
|
|
"signal/brier_reward/group_bin_occupancy": 0.835763888888889,
|
|
"signal/brier_reward/group_std_mean": 0.18042805790901184,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013989889249205589,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013989889249205589,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03554730340838432,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8277777777777778,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05846796631813049,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003554730489850044,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003554730489850044,
|
|
"signal/format_reward/centered_abs_mean": 0.02422960065305233,
|
|
"signal/format_reward/group_bin_occupancy": 0.1482638888888889,
|
|
"signal/format_reward/group_std_mean": 0.04518317058682442,
|
|
"signal/format_reward/group_zero_std_frac": 0.8138888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012114800326526166,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012114800326526166,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024452964775264264,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004462533164769411,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.056620480492711e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.056620480492711e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18128202557563783,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8354166666666668,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23500166237354278,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002266025450080633,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002266025450080633,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18128202557563783,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8354166666666668,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23500166237354278,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002266025450080633,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002266025450080633,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17996532022953032,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23336804807186126,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002249566651880741,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002249566651880741,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11691176444292069,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8347222222222224,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15411899387836456,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00146139704156667,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00146139704156667,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06802540868520737,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.928125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08644652813673019,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008503176271915436,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008503176271915436,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11125858575105667,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8972222222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1438766151666641,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013907323591411114,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013907323591411114,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18128202557563783,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8354166666666668,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23500166237354278,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002266025450080633,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002266025450080633,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.020656683668494224,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7170138888888888,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02589767798781395,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020656683016568424,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020656683016568424,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2591008573770523,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7864583333333333,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32451775670051575,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025910085812211037,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025910085812211037,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1956443021676986,
|
|
"calibration/batch_distribution_entropy": 0.9721898543770948,
|
|
"calibration/batch_entropy_100bins": 0.9582683246927992,
|
|
"calibration/batch_entropy_10bins": 0.9721898543770948,
|
|
"calibration/batch_entropy_50bins": 0.9690488174101344,
|
|
"calibration/batch_uniqueness": 0.9514666032368121,
|
|
"calibration/buffer_distribution_entropy": 0.9854666800775806,
|
|
"calibration/buffer_entropy_100bins": 0.9919698091057843,
|
|
"calibration/buffer_entropy_10bins": 0.9854666800775806,
|
|
"calibration/buffer_entropy_50bins": 0.991146072484287,
|
|
"calibration/confidence_entropy": 0.5068397741253167,
|
|
"calibration/coverage@0%": 0.013618119773481246,
|
|
"calibration/coverage@1%": 0.013618119773481246,
|
|
"calibration/coverage@10%": 0.1095949729010646,
|
|
"calibration/coverage@15%": 0.5379352359546358,
|
|
"calibration/coverage@20%": 0.6796718053797843,
|
|
"calibration/coverage@25%": 0.8897106640947919,
|
|
"calibration/coverage@30%": 0.9510526315789474,
|
|
"calibration/coverage@5%": 0.02723068521850743,
|
|
"calibration/ece": 0.2253786384553338,
|
|
"calibration/mean_confidence": 0.5595226877564153,
|
|
"calibration/prompt_uniqueness": 0.8615784712587196,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012152777777777745,
|
|
"completions/max_length": 3242.0,
|
|
"completions/max_terminated_length": 3242.0,
|
|
"completions/mean_length": 643.6135498046875,
|
|
"completions/mean_terminated_length": 651.6073974609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 184.8,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.00043083218042738736,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.0099,
|
|
"num_tokens": 371478318.0,
|
|
"reward": 0.9993168830871582,
|
|
"reward_std": 0.1252796620130539,
|
|
"rewards/accuracy_reward": 0.6919270873069763,
|
|
"rewards/brier_reward": 0.7822542548179626,
|
|
"rewards/confidence_uniqueness_reward": 0.940218985080719,
|
|
"rewards/format_reward": 0.9877604126930237,
|
|
"rewards/frontier_aurc_reward": -0.001636920589953661,
|
|
"rewards/frontier_coverage_0": -0.009225619398057461,
|
|
"rewards/frontier_coverage_1": -0.009225619398057461,
|
|
"rewards/frontier_coverage_10": -0.00855890940874815,
|
|
"rewards/frontier_coverage_15": 0.014033466950058937,
|
|
"rewards/frontier_coverage_20": 0.05618218407034874,
|
|
"rewards/frontier_coverage_25": 0.12737512439489365,
|
|
"rewards/frontier_coverage_5": -0.009225619398057461,
|
|
"rewards/frontier_ece_reward": -0.0031481004785746335,
|
|
"rewards/frontier_entropy_batch_reward": -0.14455921649932862,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15904405415058137,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19548611111111108,
|
|
"signal/accuracy_reward/group_std_mean": 0.20459264814853667,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43611111044883727,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07952202707529069,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07952202707529069,
|
|
"signal/advantage_abs_mean": 0.0955008551478386,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0955008551478386,
|
|
"signal/advantage_pre_scale_std": 0.1540976881980896,
|
|
"signal/advantage_std": 0.1540976881980896,
|
|
"signal/brier_reward/centered_abs_mean": 0.14529342353343963,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8604166666666666,
|
|
"signal/brier_reward/group_std_mean": 0.18353629410266875,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014529342763125896,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014529342763125896,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03009340800344944,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8524305555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.046408722549676894,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030093408189713956,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030093408189713956,
|
|
"signal/format_reward/centered_abs_mean": 0.01939561627805233,
|
|
"signal/format_reward/group_bin_occupancy": 0.14131944444444441,
|
|
"signal/format_reward/group_std_mean": 0.03335440866649151,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009697808139026164,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009697808139026164,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020065686898306013,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6559027777777777,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037283867597579954,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.508210891392082e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.508210891392082e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1982041120529175,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8604166666666666,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2535953104496002,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024775514844805,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024775514844805,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1982041120529175,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8604166666666666,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2535953104496002,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024775514844805,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024775514844805,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.195833483338356,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8583333333333332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2506587952375412,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024479186162352563,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024479186162352563,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10796615332365037,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8600694444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14121497869491578,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001349576935172081,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001349576935172081,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06482557505369187,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.91875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0837198704481125,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008103197091259062,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008103197091259062,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10678046792745591,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1394643157720566,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001334755914285779,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001334755914285779,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1982041120529175,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8604166666666666,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2535953104496002,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024775514844805,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024775514844805,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02167145274579525,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7135416666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.026740428060293198,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021671453956514596,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021671453956514596,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2169477492570877,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7670138888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.28880282044410704,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02169477492570877,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02169477492570877,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16203958912445476,
|
|
"calibration/batch_distribution_entropy": 0.9564638809055342,
|
|
"calibration/batch_entropy_100bins": 0.9504243671299063,
|
|
"calibration/batch_entropy_10bins": 0.9564638809055342,
|
|
"calibration/batch_entropy_50bins": 0.9588082852441188,
|
|
"calibration/batch_uniqueness": 0.9485751236738553,
|
|
"calibration/buffer_distribution_entropy": 0.9855318683814028,
|
|
"calibration/buffer_entropy_100bins": 0.9920046840716885,
|
|
"calibration/buffer_entropy_10bins": 0.9855318683814028,
|
|
"calibration/buffer_entropy_50bins": 0.9911690583473515,
|
|
"calibration/confidence_entropy": 0.5012853403916044,
|
|
"calibration/coverage@0%": 0.05780618281758908,
|
|
"calibration/coverage@1%": 0.05780618281758908,
|
|
"calibration/coverage@10%": 0.31838293083869723,
|
|
"calibration/coverage@15%": 0.4790459460921778,
|
|
"calibration/coverage@20%": 0.5617222445744519,
|
|
"calibration/coverage@25%": 0.9087071261402514,
|
|
"calibration/coverage@30%": 0.9716345096745822,
|
|
"calibration/coverage@5%": 0.1443786218306891,
|
|
"calibration/ece": 0.17111073876394964,
|
|
"calibration/mean_confidence": 0.6004809067734467,
|
|
"calibration/prompt_uniqueness": 0.8665386519176552,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009461805555555536,
|
|
"completions/max_length": 3465.6,
|
|
"completions/max_terminated_length": 3465.6,
|
|
"completions/mean_length": 627.6765625,
|
|
"completions/mean_terminated_length": 633.6748168945312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 181.6,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.0004201448755338788,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0078,
|
|
"num_tokens": 381792096.0,
|
|
"reward": 1.016054892539978,
|
|
"reward_std": 0.12401713877916336,
|
|
"rewards/accuracy_reward": 0.7311631917953492,
|
|
"rewards/brier_reward": 0.800844419002533,
|
|
"rewards/confidence_uniqueness_reward": 0.9397600173950196,
|
|
"rewards/format_reward": 0.9903645753860474,
|
|
"rewards/frontier_aurc_reward": -0.0016162074403837322,
|
|
"rewards/frontier_coverage_0": -0.015276820957660675,
|
|
"rewards/frontier_coverage_1": -0.015276820957660675,
|
|
"rewards/frontier_coverage_10": -0.014098763652145862,
|
|
"rewards/frontier_coverage_15": 0.01533528920263052,
|
|
"rewards/frontier_coverage_20": 0.07684787213802338,
|
|
"rewards/frontier_coverage_25": 0.1640935003757477,
|
|
"rewards/frontier_coverage_5": -0.015273858606815339,
|
|
"rewards/frontier_ece_reward": -0.0036209038575179876,
|
|
"rewards/frontier_entropy_batch_reward": -0.20841516852378844,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15256619155406953,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19826388888888888,
|
|
"signal/accuracy_reward/group_std_mean": 0.20278047025203705,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4138888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07628309577703477,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07628309577703477,
|
|
"signal/advantage_abs_mean": 0.09019981622695923,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09019981622695923,
|
|
"signal/advantage_pre_scale_std": 0.15100550949573516,
|
|
"signal/advantage_std": 0.15100550949573516,
|
|
"signal/brier_reward/centered_abs_mean": 0.13891661763191224,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8350694444444444,
|
|
"signal/brier_reward/group_std_mean": 0.17901506423950195,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01389166172593832,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01389166172593832,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028540104255080224,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8395833333333333,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04933372884988785,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028540104161947966,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028540104161947966,
|
|
"signal/format_reward/centered_abs_mean": 0.01725802943110466,
|
|
"signal/format_reward/group_bin_occupancy": 0.14479166666666668,
|
|
"signal/format_reward/group_std_mean": 0.03579398356378079,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00862901471555233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00862901471555233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020792306633666156,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6552083333333333,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0039239289239048954,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5990382710006088e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5990382710006088e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18381263613700866,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23998367488384248,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022976579144597053,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022976579144597053,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18381263613700866,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23998367488384248,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022976579144597053,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022976579144597053,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1811767816543579,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8246527777777779,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2366650640964508,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022647099569439886,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022647099569439886,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09005323797464371,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8552083333333333,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11996055394411087,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011256654281169177,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011256654281169177,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07242253422737122,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.923611111111111,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09198382496833801,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009052816778421402,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009052816778421402,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11942882239818572,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8909722222222222,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15319141745567322,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014928602380678059,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014928602380678059,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18380914330482484,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23997901380062103,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002297614235430956,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002297614235430956,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02063850834965706,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6902777777777778,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02570592537522316,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002063850755803287,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002063850755803287,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2538691431283951,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7777777777777779,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3231283605098724,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025386914610862732,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025386914610862732,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1961372764660094,
|
|
"calibration/batch_distribution_entropy": 0.9787626043754896,
|
|
"calibration/batch_entropy_100bins": 0.9610797936801457,
|
|
"calibration/batch_entropy_10bins": 0.9787626043754896,
|
|
"calibration/batch_entropy_50bins": 0.9740262499384797,
|
|
"calibration/batch_uniqueness": 0.952937991093231,
|
|
"calibration/buffer_distribution_entropy": 0.9854658504578813,
|
|
"calibration/buffer_entropy_100bins": 0.9919799868819045,
|
|
"calibration/buffer_entropy_10bins": 0.9854658504578813,
|
|
"calibration/buffer_entropy_50bins": 0.9911334418833307,
|
|
"calibration/confidence_entropy": 0.4938269497363332,
|
|
"calibration/coverage@0%": 0.017848624480017818,
|
|
"calibration/coverage@1%": 0.017848624480017818,
|
|
"calibration/coverage@10%": 0.3216058794764497,
|
|
"calibration/coverage@15%": 0.44077104008081436,
|
|
"calibration/coverage@20%": 0.5248942584977563,
|
|
"calibration/coverage@25%": 0.7557016752611363,
|
|
"calibration/coverage@30%": 0.8214413911949145,
|
|
"calibration/coverage@5%": 0.14017045767584957,
|
|
"calibration/ece": 0.17108793119108695,
|
|
"calibration/mean_confidence": 0.5567763002783004,
|
|
"calibration/prompt_uniqueness": 0.8632525025551473,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012152777777777768,
|
|
"completions/max_length": 3528.2,
|
|
"completions/max_terminated_length": 3528.2,
|
|
"completions/mean_length": 654.6660522460937,
|
|
"completions/mean_terminated_length": 662.805517578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 188.2,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.0004228286852594465,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0096,
|
|
"num_tokens": 392414713.0,
|
|
"reward": 0.9901637196540832,
|
|
"reward_std": 0.12346882373094559,
|
|
"rewards/accuracy_reward": 0.6758680582046509,
|
|
"rewards/brier_reward": 0.7898530125617981,
|
|
"rewards/confidence_uniqueness_reward": 0.9386414170265198,
|
|
"rewards/format_reward": 0.9878472208976745,
|
|
"rewards/frontier_aurc_reward": -0.0017654816154390573,
|
|
"rewards/frontier_coverage_0": 0.008542282739654183,
|
|
"rewards/frontier_coverage_1": 0.008542282739654183,
|
|
"rewards/frontier_coverage_10": 0.00909471595659852,
|
|
"rewards/frontier_coverage_15": 0.0248194869607687,
|
|
"rewards/frontier_coverage_20": 0.07406894192099571,
|
|
"rewards/frontier_coverage_25": 0.14769483357667923,
|
|
"rewards/frontier_coverage_5": 0.008551673218607902,
|
|
"rewards/frontier_ece_reward": -0.0010886201984249056,
|
|
"rewards/frontier_entropy_batch_reward": -0.1792888253927231,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14333767145872117,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.196875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1954125940799713,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.425,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07166883572936059,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07166883572936059,
|
|
"signal/advantage_abs_mean": 0.0889064148068428,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0889064148068428,
|
|
"signal/advantage_pre_scale_std": 0.14890001118183135,
|
|
"signal/advantage_std": 0.14890001118183135,
|
|
"signal/brier_reward/centered_abs_mean": 0.14416175484657287,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8413194444444445,
|
|
"signal/brier_reward/group_std_mean": 0.18464682400226592,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01441617514938116,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01441617514938116,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03026603311300278,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8399305555555555,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.050313469022512436,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003026603301987052,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003026603301987052,
|
|
"signal/format_reward/centered_abs_mean": 0.019162326864898205,
|
|
"signal/format_reward/group_bin_occupancy": 0.14479166666666665,
|
|
"signal/format_reward/group_std_mean": 0.036996308341622354,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009581163432449103,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009581163432449103,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021472658263519406,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6767361111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038169473875313996,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.684082428459078e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.684082428459078e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1832861989736557,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.845486111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24093341827392578,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022910774918273092,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022910774918273092,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1832861989736557,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.845486111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24093341827392578,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022910774918273092,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022910774918273092,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18060127198696135,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.84375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2375454157590866,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022575160022825004,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022575160022825004,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08088361173868179,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.867013888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10819252133369446,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010110451141372323,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010110451141372323,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07415172904729843,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9243055555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09439714550971985,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009268965688534081,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009268965688534081,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11941829919815064,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8850694444444445,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15383650064468385,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014927288517355918,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014927288517355918,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18320149779319764,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.845486111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24082353413105012,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002290018741041422,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002290018741041422,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01998976320028305,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6802083333333333,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02510824017226696,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001998976385220885,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001998976385220885,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23103305995464324,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7687499999999999,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.30060619711875913,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023103305697441102,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.023103305697441102,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14460988695922733,
|
|
"calibration/batch_distribution_entropy": 0.9495513680360439,
|
|
"calibration/batch_entropy_100bins": 0.948140998369011,
|
|
"calibration/batch_entropy_10bins": 0.9495513680360439,
|
|
"calibration/batch_entropy_50bins": 0.9562598495698762,
|
|
"calibration/batch_uniqueness": 0.9474501559632424,
|
|
"calibration/buffer_distribution_entropy": 0.9853301528282403,
|
|
"calibration/buffer_entropy_100bins": 0.9919136390750427,
|
|
"calibration/buffer_entropy_10bins": 0.9853301528282403,
|
|
"calibration/buffer_entropy_50bins": 0.9910459470294087,
|
|
"calibration/confidence_entropy": 0.5112296963776538,
|
|
"calibration/coverage@0%": 0.032981623612241454,
|
|
"calibration/coverage@1%": 0.032981623612241454,
|
|
"calibration/coverage@10%": 0.46781779581637994,
|
|
"calibration/coverage@15%": 0.5944394938795089,
|
|
"calibration/coverage@20%": 0.7251206752410088,
|
|
"calibration/coverage@25%": 0.9559466293867314,
|
|
"calibration/coverage@30%": 0.9758530183727034,
|
|
"calibration/coverage@5%": 0.08477990939682992,
|
|
"calibration/ece": 0.18029392485104195,
|
|
"calibration/mean_confidence": 0.6085970431868855,
|
|
"calibration/prompt_uniqueness": 0.867308580953482,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009548611111111138,
|
|
"completions/max_length": 3404.4,
|
|
"completions/max_terminated_length": 3404.4,
|
|
"completions/mean_length": 633.7380249023438,
|
|
"completions/mean_terminated_length": 639.8541870117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 195.2,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.00048563332529738545,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0069,
|
|
"num_tokens": 402783183.0,
|
|
"reward": 1.0019099831581115,
|
|
"reward_std": 0.12241675555706025,
|
|
"rewards/accuracy_reward": 0.7002604126930236,
|
|
"rewards/brier_reward": 0.7927653312683105,
|
|
"rewards/confidence_uniqueness_reward": 0.9402285814285278,
|
|
"rewards/format_reward": 0.9902777671813965,
|
|
"rewards/frontier_aurc_reward": -0.0022210155380889773,
|
|
"rewards/frontier_coverage_0": -0.0035953870275989173,
|
|
"rewards/frontier_coverage_1": -0.0035953870275989173,
|
|
"rewards/frontier_coverage_10": -0.0024497059057466686,
|
|
"rewards/frontier_coverage_15": 0.02303452733904123,
|
|
"rewards/frontier_coverage_20": 0.07562950998544693,
|
|
"rewards/frontier_coverage_25": 0.15210793316364288,
|
|
"rewards/frontier_coverage_5": -0.003559676537406631,
|
|
"rewards/frontier_ece_reward": -0.003188342018984258,
|
|
"rewards/frontier_entropy_batch_reward": -0.19281545877456666,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1425075948238373,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.190625,
|
|
"signal/accuracy_reward/group_std_mean": 0.18748272955417633,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.47500001192092894,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07125379741191865,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07125379741191865,
|
|
"signal/advantage_abs_mean": 0.09010151475667953,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09010151475667953,
|
|
"signal/advantage_pre_scale_std": 0.14910052120685577,
|
|
"signal/advantage_std": 0.14910052120685577,
|
|
"signal/brier_reward/centered_abs_mean": 0.14125451743602752,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8416666666666668,
|
|
"signal/brier_reward/group_std_mean": 0.1821454256772995,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014125452749431134,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014125452749431134,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028338390961289407,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.84375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04806696176528931,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002833839226514101,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002833839226514101,
|
|
"signal/format_reward/centered_abs_mean": 0.01692708358168602,
|
|
"signal/format_reward/group_bin_occupancy": 0.14375,
|
|
"signal/format_reward/group_std_mean": 0.03435967043042183,
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00846354179084301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00846354179084301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002712964592501521,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6565972222222222,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005087446887046099,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.391205755178817e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.391205755178817e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17483938336372376,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8496527777777777,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2283846229314804,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00218549226410687,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00218549226410687,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17483938336372376,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8496527777777777,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2283846229314804,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00218549226410687,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00218549226410687,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17156257033348082,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22431055903434755,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002144532185047865,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002144532185047865,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07161953896284104,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09553508013486862,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008952441858127713,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008952441858127713,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07612911015748977,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9177083333333332,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0976193055510521,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000951613939832896,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000951613939832896,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12673709094524382,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9010416666666666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16289995312690736,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015842135995626449,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015842135995626449,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1747281402349472,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22824438512325287,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021841016598045824,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021841016598045824,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01971760131418705,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6673611111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.024473632127046584,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001971760136075318,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001971760136075318,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2508280843496323,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7565972222222223,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3208978533744812,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02508280873298645,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02508280873298645,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.1313149807340917,
|
|
"eval_calibration/batch_distribution_entropy": 0.9010256730205485,
|
|
"eval_calibration/batch_entropy_100bins": 0.705057482475523,
|
|
"eval_calibration/batch_entropy_10bins": 0.9010256730205485,
|
|
"eval_calibration/batch_entropy_50bins": 0.7709351303877786,
|
|
"eval_calibration/batch_uniqueness": 0.8971089956208811,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9850070003261059,
|
|
"eval_calibration/buffer_entropy_100bins": 0.991778938093653,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9850070003261059,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9908763990301034,
|
|
"eval_calibration/confidence_entropy": 0.4905625738638028,
|
|
"eval_calibration/coverage@0%": 0.245127688172043,
|
|
"eval_calibration/coverage@1%": 0.245127688172043,
|
|
"eval_calibration/coverage@10%": 0.526377688172043,
|
|
"eval_calibration/coverage@15%": 0.651377688172043,
|
|
"eval_calibration/coverage@20%": 0.814516129032258,
|
|
"eval_calibration/coverage@25%": 0.9578293010752689,
|
|
"eval_calibration/coverage@30%": 0.9947916666666666,
|
|
"eval_calibration/coverage@5%": 0.245127688172043,
|
|
"eval_calibration/ece": 0.22274469707586766,
|
|
"eval_calibration/mean_confidence": 0.5956210189997321,
|
|
"eval_calibration/prompt_uniqueness": 0.8971089956208811,
|
|
"eval_completions/clipped_ratio": 0.011284722222222229,
|
|
"eval_completions/max_length": 2300.8333333333335,
|
|
"eval_completions/max_terminated_length": 2300.8333333333335,
|
|
"eval_completions/mean_length": 645.4890747070312,
|
|
"eval_completions/mean_terminated_length": 652.927968343099,
|
|
"eval_completions/min_length": 50.0,
|
|
"eval_completions/min_terminated_length": 223.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 402783183.0,
|
|
"eval_reward": 0.9437916080156962,
|
|
"eval_reward_std": 0.243720144033432,
|
|
"eval_rewards/accuracy_reward": 0.6909722288449606,
|
|
"eval_rewards/brier_reward": 0.7851507067680359,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8838565051555634,
|
|
"eval_rewards/format_reward": 0.9869791666666666,
|
|
"eval_rewards/frontier_aurc_reward": -0.0024154275791564337,
|
|
"eval_rewards/frontier_coverage_0": -0.0033070078740517297,
|
|
"eval_rewards/frontier_coverage_1": -0.0033070078740517297,
|
|
"eval_rewards/frontier_coverage_10": -0.0025324359691391387,
|
|
"eval_rewards/frontier_coverage_15": 0.024358487998445828,
|
|
"eval_rewards/frontier_coverage_20": 0.07857182746132214,
|
|
"eval_rewards/frontier_coverage_25": 0.1538891519109408,
|
|
"eval_rewards/frontier_coverage_5": -0.003267340362071991,
|
|
"eval_rewards/frontier_ece_reward": -0.002330610683808724,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6487665772438049,
|
|
"eval_runtime": 207.9871,
|
|
"eval_samples_per_second": 4.808,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4146050314108531,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.46133896211783093,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20730251570542654,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20730251570542654,
|
|
"eval_signal/advantage_abs_mean": 0.20695754885673523,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20695754885673523,
|
|
"eval_signal/advantage_pre_scale_std": 0.24312934776147208,
|
|
"eval_signal/advantage_std": 0.24312934776147208,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19702969988187155,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8784722222222223,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2518209119637807,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019702970360716183,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019702970360716183,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05498677988847097,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.38888888888888884,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08798779795567195,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005498677957803011,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005498677957803011,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.024576822761446238,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.1597222222222222,
|
|
"eval_signal/format_reward/group_std_mean": 0.05818357535948356,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.7222222487131754,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.012288411380723119,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.012288411380723119,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004131359804887325,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5694444444444444,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009391291804301241,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.16420004714746e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.16420004714746e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.25250349193811417,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9027777777777778,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.3640611221392949,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003156293804446856,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003156293804446856,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.25250349193811417,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9027777777777778,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3640611221392949,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003156293804446856,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003156293804446856,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.24736239512761435,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9027777777777778,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.3576079159975052,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030920300244664154,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030920300244664154,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.08761641258994739,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8854166666666666,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.13278244932492575,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010952051864781727,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010952051864781727,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1130032017827034,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9305555555555555,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.14612068235874176,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014125400533278782,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014125400533278782,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2178284153342247,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9270833333333334,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.2713290477792422,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027228551916778088,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027228551916778088,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2523079713185628,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9027777777777778,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.36381277441978455,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031538497811804214,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031538497811804214,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.02772780228406191,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8506944444444445,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.03577593838175138,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00277278032929947,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00277278032929947,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.322118878364563,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.28472222222222227,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3369586815436681,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03221188889195522,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03221188889195522,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19286525379114844,
|
|
"calibration/batch_distribution_entropy": 0.9508638400230053,
|
|
"calibration/batch_entropy_100bins": 0.9472046153786987,
|
|
"calibration/batch_entropy_10bins": 0.9508638400230053,
|
|
"calibration/batch_entropy_50bins": 0.9586811454627515,
|
|
"calibration/batch_uniqueness": 0.9478697952898288,
|
|
"calibration/buffer_distribution_entropy": 0.985099606750224,
|
|
"calibration/buffer_entropy_100bins": 0.9918358877130297,
|
|
"calibration/buffer_entropy_10bins": 0.985099606750224,
|
|
"calibration/buffer_entropy_50bins": 0.9909422179315424,
|
|
"calibration/confidence_entropy": 0.4997781015447198,
|
|
"calibration/coverage@0%": 0.01370268496669971,
|
|
"calibration/coverage@1%": 0.01370268496669971,
|
|
"calibration/coverage@10%": 0.08365004677399088,
|
|
"calibration/coverage@15%": 0.39644238421691963,
|
|
"calibration/coverage@20%": 0.611257871294732,
|
|
"calibration/coverage@25%": 0.8797210315410284,
|
|
"calibration/coverage@30%": 0.939168679577219,
|
|
"calibration/coverage@5%": 0.01370268496669971,
|
|
"calibration/ece": 0.14749019364217641,
|
|
"calibration/mean_confidence": 0.606638199869165,
|
|
"calibration/prompt_uniqueness": 0.8656049290865866,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008767361111111116,
|
|
"completions/max_length": 3214.0,
|
|
"completions/max_terminated_length": 3214.0,
|
|
"completions/mean_length": 652.1061767578125,
|
|
"completions/mean_terminated_length": 657.898583984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 175.6,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.0003685148840304464,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0055,
|
|
"num_tokens": 413361398.0,
|
|
"reward": 1.0259872198104858,
|
|
"reward_std": 0.11690017282962799,
|
|
"rewards/accuracy_reward": 0.7453993082046508,
|
|
"rewards/brier_reward": 0.7930923223495483,
|
|
"rewards/confidence_uniqueness_reward": 0.9420808672904968,
|
|
"rewards/format_reward": 0.9911458373069764,
|
|
"rewards/frontier_aurc_reward": -0.0016583121148869395,
|
|
"rewards/frontier_coverage_0": -0.030156330950558186,
|
|
"rewards/frontier_coverage_1": -0.030156330950558186,
|
|
"rewards/frontier_coverage_10": -0.028441790863871573,
|
|
"rewards/frontier_coverage_15": 0.021205396763980387,
|
|
"rewards/frontier_coverage_20": 0.08911058455705642,
|
|
"rewards/frontier_coverage_25": 0.17736924588680267,
|
|
"rewards/frontier_coverage_5": -0.0300977423787117,
|
|
"rewards/frontier_ece_reward": -0.006220728810876608,
|
|
"rewards/frontier_entropy_batch_reward": -0.17270275056362153,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14183485507965088,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1951388888888889,
|
|
"signal/accuracy_reward/group_std_mean": 0.19232783019542693,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07091742753982544,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07091742753982544,
|
|
"signal/advantage_abs_mean": 0.0845324456691742,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0845324456691742,
|
|
"signal/advantage_pre_scale_std": 0.1428141325712204,
|
|
"signal/advantage_std": 0.1428141325712204,
|
|
"signal/brier_reward/centered_abs_mean": 0.1418829470872879,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84375,
|
|
"signal/brier_reward/group_std_mean": 0.18193072974681854,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014188294671475888,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014188294671475888,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026093775033950807,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.865625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04311029836535454,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026093775872141124,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026093775872141124,
|
|
"signal/format_reward/centered_abs_mean": 0.014822048880159856,
|
|
"signal/format_reward/group_bin_occupancy": 0.140625,
|
|
"signal/format_reward/group_std_mean": 0.029389195144176483,
|
|
"signal/format_reward/group_zero_std_frac": 0.875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007411024440079928,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007411024440079928,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002050434215925634,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6666666666666667,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003803052147850394,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.563042944530025e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.563042944530025e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18497320711612703,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8427083333333334,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24077284038066865,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002312165219336748,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002312165219336748,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18497320711612703,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8427083333333334,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24077284038066865,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002312165219336748,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002312165219336748,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18142623901367189,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8402777777777777,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23639352917671203,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022678279783576727,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022678279783576727,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07308610081672669,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8972222222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09623065441846848,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009135762811638415,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009135762811638415,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07907227426767349,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9204861111111112,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10045547783374786,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009884034050628542,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009884034050628542,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12448778450489044,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8934027777777779,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1596580684185028,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015560972038656472,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015560972038656472,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18486446142196655,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8427083333333334,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24063428342342377,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002310805721208453,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002310805721208453,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.020203196629881858,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6336805555555556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02513127215206623,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020203196443617346,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020203196443617346,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.22744437754154206,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7774305555555555,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2937332093715668,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022744438052177428,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.022744438052177428,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10604995661886611,
|
|
"calibration/batch_distribution_entropy": 0.9459823056432164,
|
|
"calibration/batch_entropy_100bins": 0.9445751943961431,
|
|
"calibration/batch_entropy_10bins": 0.9459823056432164,
|
|
"calibration/batch_entropy_50bins": 0.9524671961811779,
|
|
"calibration/batch_uniqueness": 0.945795930081056,
|
|
"calibration/buffer_distribution_entropy": 0.9843894309209239,
|
|
"calibration/buffer_entropy_100bins": 0.991489315771584,
|
|
"calibration/buffer_entropy_10bins": 0.9843894309209239,
|
|
"calibration/buffer_entropy_50bins": 0.9905221962633863,
|
|
"calibration/confidence_entropy": 0.49619946668569376,
|
|
"calibration/coverage@0%": 0.0810229902207145,
|
|
"calibration/coverage@1%": 0.0810229902207145,
|
|
"calibration/coverage@10%": 0.5336122948397576,
|
|
"calibration/coverage@15%": 0.7811456966373963,
|
|
"calibration/coverage@20%": 0.8943865740740741,
|
|
"calibration/coverage@25%": 0.9825562169312169,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.28413915588222977,
|
|
"calibration/ece": 0.13840996405365147,
|
|
"calibration/mean_confidence": 0.6308651120728337,
|
|
"calibration/prompt_uniqueness": 0.8631049911748535,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00868055555555558,
|
|
"completions/max_length": 3621.0,
|
|
"completions/max_terminated_length": 3621.0,
|
|
"completions/mean_length": 655.096923828125,
|
|
"completions/mean_terminated_length": 660.8446044921875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 188.33333333333334,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 419755476.0,
|
|
"reward": 1.001288930575053,
|
|
"reward_std": 0.11777538061141968,
|
|
"rewards/accuracy_reward": 0.6956018408139547,
|
|
"rewards/brier_reward": 0.7960908611615499,
|
|
"rewards/confidence_uniqueness_reward": 0.94153360525767,
|
|
"rewards/format_reward": 0.9911747574806213,
|
|
"rewards/frontier_aurc_reward": -0.0013014324552689989,
|
|
"rewards/frontier_coverage_0": 0.00015948344177256027,
|
|
"rewards/frontier_coverage_1": 0.00015948344177256027,
|
|
"rewards/frontier_coverage_10": 0.0008107475781192383,
|
|
"rewards/frontier_coverage_15": 0.027395144725839298,
|
|
"rewards/frontier_coverage_20": 0.08610829710960388,
|
|
"rewards/frontier_coverage_25": 0.1655142605304718,
|
|
"rewards/frontier_coverage_5": 0.0002017094132800897,
|
|
"rewards/frontier_ece_reward": -0.0024814563415323696,
|
|
"rewards/frontier_entropy_batch_reward": -0.19101824859778085,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14246961971124014,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1909722222222222,
|
|
"signal/accuracy_reward/group_std_mean": 0.1864061305920283,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4722222089767456,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07123480985562007,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07123480985562007,
|
|
"signal/advantage_abs_mean": 0.08667557189861934,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08667557189861934,
|
|
"signal/advantage_pre_scale_std": 0.1438957303762436,
|
|
"signal/advantage_std": 0.1438957303762436,
|
|
"signal/brier_reward/centered_abs_mean": 0.14015839993953705,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8518518518518517,
|
|
"signal/brier_reward/group_std_mean": 0.17842622101306915,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014015840366482735,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014015840366482735,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027561215683817863,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8454861111111112,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04839188729723295,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002756121257940928,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002756121257940928,
|
|
"signal/format_reward/centered_abs_mean": 0.01621274556964636,
|
|
"signal/format_reward/group_bin_occupancy": 0.14467592592592593,
|
|
"signal/format_reward/group_std_mean": 0.034930519138773285,
|
|
"signal/format_reward/group_zero_std_frac": 0.8425925970077515,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00810637278482318,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00810637278482318,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016874617819363873,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6921296296296297,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032867664316048226,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1093272759268682e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1093272759268682e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19459756712118784,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8425925925925926,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24988562365372977,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024324696666250625,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024324696666250625,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19459756712118784,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8425925925925926,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24988562365372977,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024324696666250625,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024324696666250625,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19071337580680847,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8431712962962963,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2450977216164271,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002383917337283492,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002383917337283492,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07185898224512736,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9074074074074074,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09349055091540019,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008982373401522636,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008982373401522636,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07390168060859044,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9201388888888888,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09523183355728786,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009237710037268698,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009237710037268698,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11694104224443436,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8831018518518517,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15213672816753387,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014617630513384938,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014617630513384938,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19448575377464294,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8425925925925926,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24974611898263296,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024310719842712083,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024310719842712083,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02061399631202221,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6296296296296297,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025443398704131443,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00206139978642265,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00206139978642265,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24270604054133096,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7702546296296297,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3108425835768382,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0242706040541331,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0242706040541331,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.009165088099857362,
|
|
"train_runtime": 38159.999,
|
|
"train_samples_per_second": 0.393,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 419755476,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|