8083 lines
490 KiB
JSON
8083 lines
490 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.6351994038421116,
|
|
"calibration/batch_distribution_entropy": 0.6512784692126155,
|
|
"calibration/confidence_entropy": 0.3468661812035868,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.49083744047999345,
|
|
"calibration/mean_confidence": 0.7901167725714044,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03408203125,
|
|
"completions/max_length": 1502.8,
|
|
"completions/max_terminated_length": 1502.8,
|
|
"completions/mean_length": 215.82197265625,
|
|
"completions/mean_terminated_length": 223.429248046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.029902346432209015,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.01,
|
|
"num_tokens": 17054049.0,
|
|
"reward": 0.5691495656967163,
|
|
"reward_std": 0.4165258049964905,
|
|
"rewards/accuracy_reward": 0.22001953125,
|
|
"rewards/brier_reward": 0.3759719550609589,
|
|
"rewards/confidence_uniqueness_reward": 0.4875619649887085,
|
|
"rewards/format_reward": 0.6849609375,
|
|
"rewards/frontier_coverage_0": 0.30275666117668154,
|
|
"rewards/frontier_coverage_1": 0.30275666117668154,
|
|
"rewards/frontier_coverage_10": 0.30275666117668154,
|
|
"rewards/frontier_coverage_15": 0.30275666117668154,
|
|
"rewards/frontier_coverage_20": 0.30275666117668154,
|
|
"rewards/frontier_coverage_25": 0.30275666117668154,
|
|
"rewards/frontier_coverage_5": 0.30275666117668154,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.239569091796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.28268457651138307,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.309375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1197845458984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1197845458984375,
|
|
"signal/advantage_abs_mean": 0.354982727766037,
|
|
"signal/advantage_pre_scale_abs_mean": 0.354982727766037,
|
|
"signal/advantage_pre_scale_std": 0.4236880660057068,
|
|
"signal/advantage_std": 0.4236880660057068,
|
|
"signal/brier_reward/centered_abs_mean": 0.32025502920150756,
|
|
"signal/brier_reward/group_std_mean": 0.3653526544570923,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03202550373971462,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03202550373971462,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2980002284049988,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.34901362657546997,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029800022765994073,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029800022765994073,
|
|
"signal/format_reward/centered_abs_mean": 0.40311279296875,
|
|
"signal/format_reward/group_std_mean": 0.45344988703727723,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.201556396484375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.201556396484375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2926347076892853,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.34393285512924193,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2926347076892853,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.34393285512924193,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2926347076892853,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.34393285512924193,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2926347076892853,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.34393285512924193,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2926347076892853,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.34393285512924193,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2926347076892853,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.34393285512924193,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2926347076892853,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.34393285512924193,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004184676380828023,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004184676380828023,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6689473392497769,
|
|
"calibration/batch_distribution_entropy": 0.6514151099566186,
|
|
"calibration/confidence_entropy": 0.34427881956673384,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5258273923359426,
|
|
"calibration/mean_confidence": 0.79101776183208,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03828125,
|
|
"completions/max_length": 1505.8,
|
|
"completions/max_terminated_length": 1505.8,
|
|
"completions/mean_length": 204.794140625,
|
|
"completions/mean_terminated_length": 212.9551544189453,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 1.8,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.03670521453022957,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0038,
|
|
"num_tokens": 34251493.0,
|
|
"reward": 0.5770156979560852,
|
|
"reward_std": 0.39822320342063905,
|
|
"rewards/accuracy_reward": 0.2083984375,
|
|
"rewards/brier_reward": 0.37617892026901245,
|
|
"rewards/confidence_uniqueness_reward": 0.5085799217224121,
|
|
"rewards/format_reward": 0.708984375,
|
|
"rewards/frontier_coverage_0": 0.298185932636261,
|
|
"rewards/frontier_coverage_1": 0.298185932636261,
|
|
"rewards/frontier_coverage_10": 0.298185932636261,
|
|
"rewards/frontier_coverage_15": 0.298185932636261,
|
|
"rewards/frontier_coverage_20": 0.298185932636261,
|
|
"rewards/frontier_coverage_25": 0.298185932636261,
|
|
"rewards/frontier_coverage_5": 0.298185932636261,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.22000732421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2672633767127991,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.325,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.110003662109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.110003662109375,
|
|
"signal/advantage_abs_mean": 0.3318171322345734,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3318171322345734,
|
|
"signal/advantage_pre_scale_std": 0.40630478858947755,
|
|
"signal/advantage_std": 0.40630478858947755,
|
|
"signal/brier_reward/centered_abs_mean": 0.3074793994426727,
|
|
"signal/brier_reward/group_std_mean": 0.3563279390335083,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030747941136360167,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.030747941136360167,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2858774721622467,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3415905833244324,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02858774848282337,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02858774848282337,
|
|
"signal/format_reward/centered_abs_mean": 0.38082275390625,
|
|
"signal/format_reward/group_std_mean": 0.43958239555358886,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.190411376953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.190411376953125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.28009108304977415,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.33603209257125854,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.28009108304977415,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.33603209257125854,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.28009108304977415,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.33603209257125854,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.28009108304977415,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.33603209257125854,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.28009108304977415,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.33603209257125854,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.28009108304977415,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.33603209257125854,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.28009108304977415,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.33603209257125854,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004005302442237735,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004005302442237735,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6155595065201233,
|
|
"calibration/batch_distribution_entropy": 0.6427073391342403,
|
|
"calibration/buffer_distribution_entropy": 0.6650037228066683,
|
|
"calibration/confidence_entropy": 0.34433009595302805,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.48542370452858075,
|
|
"calibration/mean_confidence": 0.801199768518267,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0216796875,
|
|
"completions/max_length": 1493.4,
|
|
"completions/max_terminated_length": 1493.4,
|
|
"completions/mean_length": 177.4697265625,
|
|
"completions/mean_terminated_length": 181.56385803222656,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.2,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.02985437400639057,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0052,
|
|
"num_tokens": 51117519.0,
|
|
"reward": 0.7041961193084717,
|
|
"reward_std": 0.31099514067173006,
|
|
"rewards/accuracy_reward": 0.260546875,
|
|
"rewards/brier_reward": 0.47122411131858827,
|
|
"rewards/confidence_uniqueness_reward": 0.6331815242767334,
|
|
"rewards/format_reward": 0.866015625,
|
|
"rewards/frontier_coverage_0": 0.30443855822086335,
|
|
"rewards/frontier_coverage_1": 0.30443855822086335,
|
|
"rewards/frontier_coverage_10": 0.30443855822086335,
|
|
"rewards/frontier_coverage_15": 0.30443855822086335,
|
|
"rewards/frontier_coverage_20": 0.30443855822086335,
|
|
"rewards/frontier_coverage_25": 0.30443855822086335,
|
|
"rewards/frontier_coverage_5": 0.30443855822086335,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20245361328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.25178754329681396,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.34375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.101226806640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.101226806640625,
|
|
"signal/advantage_abs_mean": 0.23251062631607056,
|
|
"signal/advantage_pre_scale_abs_mean": 0.23251062631607056,
|
|
"signal/advantage_pre_scale_std": 0.32085421681404114,
|
|
"signal/advantage_std": 0.32085421681404114,
|
|
"signal/brier_reward/centered_abs_mean": 0.27578999400138854,
|
|
"signal/brier_reward/group_std_mean": 0.32952038645744325,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02757900021970272,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02757900021970272,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20353608727455139,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2688037037849426,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020353609696030617,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020353609696030617,
|
|
"signal/format_reward/centered_abs_mean": 0.2126708984375,
|
|
"signal/format_reward/group_std_mean": 0.30878249406814573,
|
|
"signal/format_reward/group_zero_std_frac": 0.071875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.10633544921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.10633544921875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23975261449813842,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29435974061489106,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23975261449813842,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29435974061489106,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23975261449813842,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29435974061489106,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23975261449813842,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29435974061489106,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23975261449813842,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29435974061489106,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23975261449813842,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29435974061489106,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23975261449813842,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29435974061489106,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034284623805433513,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5353270245215466,
|
|
"calibration/batch_distribution_entropy": 0.6883228156852644,
|
|
"calibration/buffer_distribution_entropy": 0.6604163855659319,
|
|
"calibration/confidence_entropy": 0.3737941450349532,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3867639720734177,
|
|
"calibration/mean_confidence": 0.7860741133838542,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00537109375,
|
|
"completions/max_length": 1372.8,
|
|
"completions/max_terminated_length": 1372.8,
|
|
"completions/mean_length": 131.06396484375,
|
|
"completions/mean_terminated_length": 131.7907257080078,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 19.8,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.011706759221851826,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0026,
|
|
"num_tokens": 67378014.0,
|
|
"reward": 0.796250331401825,
|
|
"reward_std": 0.1896502822637558,
|
|
"rewards/accuracy_reward": 0.3400390625,
|
|
"rewards/brier_reward": 0.5660670876502991,
|
|
"rewards/confidence_uniqueness_reward": 0.7469664692878724,
|
|
"rewards/format_reward": 0.97880859375,
|
|
"rewards/frontier_coverage_0": 0.055176225304603574,
|
|
"rewards/frontier_coverage_1": 0.055176225304603574,
|
|
"rewards/frontier_coverage_10": 0.055176225304603574,
|
|
"rewards/frontier_coverage_15": 0.055176225304603574,
|
|
"rewards/frontier_coverage_20": 0.055176225304603574,
|
|
"rewards/frontier_coverage_25": 0.055176225304603574,
|
|
"rewards/frontier_coverage_5": 0.055176225304603574,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20543212890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2560299515724182,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.102716064453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.102716064453125,
|
|
"signal/advantage_abs_mean": 0.14002106040716172,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14002106040716172,
|
|
"signal/advantage_pre_scale_std": 0.20781327784061432,
|
|
"signal/advantage_std": 0.20781327784061432,
|
|
"signal/brier_reward/centered_abs_mean": 0.23954716920852662,
|
|
"signal/brier_reward/group_std_mean": 0.2948504090309143,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023954717069864274,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023954717069864274,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12043386697769165,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1569172501564026,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012043387070298195,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012043387070298195,
|
|
"signal/format_reward/centered_abs_mean": 0.039337158203125,
|
|
"signal/format_reward/group_std_mean": 0.09293515011668205,
|
|
"signal/format_reward/group_zero_std_frac": 0.546875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0196685791015625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0196685791015625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.098221555352211,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1575959414243698,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.098221555352211,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1575959414243698,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.098221555352211,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1575959414243698,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.098221555352211,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1575959414243698,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.098221555352211,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1575959414243698,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.098221555352211,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1575959414243698,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.098221555352211,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1575959414243698,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014045683201402426,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6381643594481621,
|
|
"calibration/batch_distribution_entropy": 0.7804464216854378,
|
|
"calibration/buffer_distribution_entropy": 0.6838159309570526,
|
|
"calibration/confidence_entropy": 0.4490663154010008,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4193330378505376,
|
|
"calibration/mean_confidence": 0.7320443286313054,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001171875,
|
|
"completions/max_length": 711.4,
|
|
"completions/max_terminated_length": 711.4,
|
|
"completions/mean_length": 108.21484375,
|
|
"completions/mean_terminated_length": 108.34236907958984,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 22.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.07899007946252823,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0021,
|
|
"num_tokens": 83419286.0,
|
|
"reward": 0.8220680236816407,
|
|
"reward_std": 0.1564656525850296,
|
|
"rewards/accuracy_reward": 0.356640625,
|
|
"rewards/brier_reward": 0.6099278688430786,
|
|
"rewards/confidence_uniqueness_reward": 0.8057548403739929,
|
|
"rewards/format_reward": 0.99306640625,
|
|
"rewards/frontier_coverage_0": 0.05640597715973854,
|
|
"rewards/frontier_coverage_1": 0.05640597715973854,
|
|
"rewards/frontier_coverage_10": 0.05640597715973854,
|
|
"rewards/frontier_coverage_15": 0.05640597715973854,
|
|
"rewards/frontier_coverage_20": 0.05640597715973854,
|
|
"rewards/frontier_coverage_25": 0.05640597715973854,
|
|
"rewards/frontier_coverage_5": 0.05640597715973854,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.189013671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.23597990572452546,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.384375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0945068359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0945068359375,
|
|
"signal/advantage_abs_mean": 0.11983990371227264,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11983990371227264,
|
|
"signal/advantage_pre_scale_std": 0.1789884090423584,
|
|
"signal/advantage_std": 0.1789884090423584,
|
|
"signal/brier_reward/centered_abs_mean": 0.2191626399755478,
|
|
"signal/brier_reward/group_std_mean": 0.2707302927970886,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021916263923048972,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021916263923048972,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07767567187547683,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10581078231334687,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007767567411065102,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007767567411065102,
|
|
"signal/format_reward/centered_abs_mean": 0.013055419921875,
|
|
"signal/format_reward/group_std_mean": 0.0338589858263731,
|
|
"signal/format_reward/group_zero_std_frac": 0.821875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0065277099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0065277099609375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.11382188200950623,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17087749242782593,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11382188200950623,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17087749242782593,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11382188200950623,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17087749242782593,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11382188200950623,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17087749242782593,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11382188200950623,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17087749242782593,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11382188200950623,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17087749242782593,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11382188200950623,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17087749242782593,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016276529058814049,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6310570415406989,
|
|
"calibration/batch_distribution_entropy": 0.8396163083060891,
|
|
"calibration/buffer_distribution_entropy": 0.72589944855154,
|
|
"calibration/confidence_entropy": 0.5324667987144341,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3369986010486203,
|
|
"calibration/mean_confidence": 0.6545671946414907,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009765625,
|
|
"completions/max_length": 550.4,
|
|
"completions/max_terminated_length": 550.4,
|
|
"completions/mean_length": 109.701171875,
|
|
"completions/mean_terminated_length": 109.80919189453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 37.6,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.004247599747031927,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0016,
|
|
"num_tokens": 99587234.0,
|
|
"reward": 0.8369772076606751,
|
|
"reward_std": 0.1397940844297409,
|
|
"rewards/accuracy_reward": 0.36689453125,
|
|
"rewards/brier_reward": 0.6584429621696473,
|
|
"rewards/confidence_uniqueness_reward": 0.8270732045173645,
|
|
"rewards/format_reward": 0.996875,
|
|
"rewards/frontier_coverage_0": 0.06534303873777389,
|
|
"rewards/frontier_coverage_1": 0.06534303873777389,
|
|
"rewards/frontier_coverage_10": 0.06534303873777389,
|
|
"rewards/frontier_coverage_15": 0.06534303873777389,
|
|
"rewards/frontier_coverage_20": 0.06534303873777389,
|
|
"rewards/frontier_coverage_25": 0.06534303873777389,
|
|
"rewards/frontier_coverage_5": 0.06534303873777389,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.176531982421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2280410945415497,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.371875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0882659912109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0882659912109375,
|
|
"signal/advantage_abs_mean": 0.10705235004425048,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10705235004425048,
|
|
"signal/advantage_pre_scale_std": 0.16180138289928436,
|
|
"signal/advantage_std": 0.16180138289928436,
|
|
"signal/brier_reward/centered_abs_mean": 0.19467200934886933,
|
|
"signal/brier_reward/group_std_mean": 0.2417706161737442,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019467201083898544,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019467201083898544,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07996234148740769,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10100700855255126,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007996234111487865,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007996234111487865,
|
|
"signal/format_reward/centered_abs_mean": 0.00604248046875,
|
|
"signal/format_reward/group_std_mean": 0.017341360449790955,
|
|
"signal/format_reward/group_zero_std_frac": 0.903125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003021240234375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003021240234375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14193402826786042,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20070194005966185,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14193402826786042,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20070194005966185,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14193402826786042,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20070194005966185,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14193402826786042,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20070194005966185,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14193402826786042,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20070194005966185,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14193402826786042,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20070194005966185,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14193402826786042,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20070194005966185,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020296565489843488,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4946849277525329,
|
|
"calibration/batch_distribution_entropy": 0.8813726157552029,
|
|
"calibration/buffer_distribution_entropy": 0.770196496343847,
|
|
"calibration/confidence_entropy": 0.5614673006037132,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.03561643835616438,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2029153969087596,
|
|
"calibration/mean_confidence": 0.5835205516314776,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 381.4,
|
|
"completions/max_terminated_length": 381.4,
|
|
"completions/mean_length": 117.57978515625,
|
|
"completions/mean_terminated_length": 117.64874572753907,
|
|
"completions/min_length": 16.8,
|
|
"completions/min_terminated_length": 44.4,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0023746925871819258,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 115900723.0,
|
|
"reward": 0.867804765701294,
|
|
"reward_std": 0.12527745664119722,
|
|
"rewards/accuracy_reward": 0.41201171875,
|
|
"rewards/brier_reward": 0.711748468875885,
|
|
"rewards/confidence_uniqueness_reward": 0.8443502902984619,
|
|
"rewards/format_reward": 0.998046875,
|
|
"rewards/frontier_coverage_0": 0.07158430591225624,
|
|
"rewards/frontier_coverage_1": 0.07158430591225624,
|
|
"rewards/frontier_coverage_10": 0.07158430591225624,
|
|
"rewards/frontier_coverage_15": 0.07158430591225624,
|
|
"rewards/frontier_coverage_20": 0.07158430591225624,
|
|
"rewards/frontier_coverage_25": 0.07158430591225624,
|
|
"rewards/frontier_coverage_5": 0.07158430591225624,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.166949462890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.21374925673007966,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.415625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0834747314453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0834747314453125,
|
|
"signal/advantage_abs_mean": 0.09721089154481888,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09721089154481888,
|
|
"signal/advantage_pre_scale_std": 0.14640960693359376,
|
|
"signal/advantage_std": 0.14640960693359376,
|
|
"signal/brier_reward/centered_abs_mean": 0.17608677446842194,
|
|
"signal/brier_reward/group_std_mean": 0.2203920841217041,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017608677595853807,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017608677595853807,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08555223494768142,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10618715584278107,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008555223420262336,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008555223420262336,
|
|
"signal/format_reward/centered_abs_mean": 0.00377197265625,
|
|
"signal/format_reward/group_std_mean": 0.010712234629318118,
|
|
"signal/format_reward/group_zero_std_frac": 0.940625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001885986328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001885986328125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18098629117012024,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23790799379348754,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18098629117012024,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23790799379348754,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18098629117012024,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23790799379348754,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18098629117012024,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23790799379348754,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18098629117012024,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23790799379348754,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18098629117012024,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23790799379348754,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18098629117012024,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23790799379348754,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002588103944435716,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002588103944435716,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5268772797001111,
|
|
"calibration/batch_distribution_entropy": 0.8795172673167941,
|
|
"calibration/buffer_distribution_entropy": 0.8186280886906422,
|
|
"calibration/confidence_entropy": 0.5856780630341514,
|
|
"calibration/coverage@0%": 0.001171875,
|
|
"calibration/coverage@1%": 0.001171875,
|
|
"calibration/coverage@10%": 0.001171875,
|
|
"calibration/coverage@15%": 0.001171875,
|
|
"calibration/coverage@20%": 0.001171875,
|
|
"calibration/coverage@25%": 0.01252140410958904,
|
|
"calibration/coverage@30%": 0.03831564946183953,
|
|
"calibration/coverage@5%": 0.001171875,
|
|
"calibration/ece": 0.14752713597429187,
|
|
"calibration/mean_confidence": 0.46553835813256955,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 673.8,
|
|
"completions/max_terminated_length": 673.8,
|
|
"completions/mean_length": 127.0328125,
|
|
"completions/mean_terminated_length": 127.10689544677734,
|
|
"completions/min_length": 23.2,
|
|
"completions/min_terminated_length": 54.2,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.0012905292678624392,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0013,
|
|
"num_tokens": 132118211.0,
|
|
"reward": 0.8764559626579285,
|
|
"reward_std": 0.10876117348670959,
|
|
"rewards/accuracy_reward": 0.41298828125,
|
|
"rewards/brier_reward": 0.7420265793800354,
|
|
"rewards/confidence_uniqueness_reward": 0.8668764352798461,
|
|
"rewards/format_reward": 0.99892578125,
|
|
"rewards/frontier_coverage_0": 0.09599030762910843,
|
|
"rewards/frontier_coverage_1": 0.09599030762910843,
|
|
"rewards/frontier_coverage_10": 0.09599030762910843,
|
|
"rewards/frontier_coverage_15": 0.09599030762910843,
|
|
"rewards/frontier_coverage_20": 0.09599030762910843,
|
|
"rewards/frontier_coverage_25": 0.09599030762910843,
|
|
"rewards/frontier_coverage_5": 0.09599030762910843,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.150067138671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.19439001083374025,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0750335693359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0750335693359375,
|
|
"signal/advantage_abs_mean": 0.08436928540468216,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08436928540468216,
|
|
"signal/advantage_pre_scale_std": 0.12733531445264817,
|
|
"signal/advantage_std": 0.12733531445264817,
|
|
"signal/brier_reward/centered_abs_mean": 0.16247932612895966,
|
|
"signal/brier_reward/group_std_mean": 0.20427174270153045,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016247932985424995,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016247932985424995,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0709274247288704,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08821647614240646,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070927425287663935,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070927425287663935,
|
|
"signal/format_reward/centered_abs_mean": 0.002081298828125,
|
|
"signal/format_reward/group_std_mean": 0.006076698657125235,
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21489879190921785,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27094546556472776,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21489879190921785,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27094546556472776,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21489879190921785,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27094546556472776,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21489879190921785,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27094546556472776,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21489879190921785,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27094546556472776,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21489879190921785,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27094546556472776,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21489879190921785,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27094546556472776,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030730527359992266,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3662413115512146,
|
|
"calibration/batch_distribution_entropy": 0.8725469191055767,
|
|
"calibration/buffer_distribution_entropy": 0.8648538408069202,
|
|
"calibration/confidence_entropy": 0.5608525866357722,
|
|
"calibration/coverage@0%": 0.0007820144324853229,
|
|
"calibration/coverage@1%": 0.0007820144324853229,
|
|
"calibration/coverage@10%": 0.014089255136986301,
|
|
"calibration/coverage@15%": 0.08962741560665362,
|
|
"calibration/coverage@20%": 0.14284491193737767,
|
|
"calibration/coverage@25%": 0.2508294092465753,
|
|
"calibration/coverage@30%": 0.2805558953033268,
|
|
"calibration/coverage@5%": 0.0007820144324853229,
|
|
"calibration/ece": 0.2382096857898642,
|
|
"calibration/mean_confidence": 0.38276720949307774,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 535.2,
|
|
"completions/max_terminated_length": 535.2,
|
|
"completions/mean_length": 134.48076171875,
|
|
"completions/mean_terminated_length": 134.49421081542968,
|
|
"completions/min_length": 42.6,
|
|
"completions/min_terminated_length": 53.6,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.0015246145194396377,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 148445726.0,
|
|
"reward": 0.9145649552345276,
|
|
"reward_std": 0.0982263907790184,
|
|
"rewards/accuracy_reward": 0.5052734375,
|
|
"rewards/brier_reward": 0.7265760660171509,
|
|
"rewards/confidence_uniqueness_reward": 0.8610557317733765,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_coverage_0": 0.03405772633850575,
|
|
"rewards/frontier_coverage_1": 0.03405772633850575,
|
|
"rewards/frontier_coverage_10": 0.03405772633850575,
|
|
"rewards/frontier_coverage_15": 0.03405772633850575,
|
|
"rewards/frontier_coverage_20": 0.03405772633850575,
|
|
"rewards/frontier_coverage_25": 0.03405772633850575,
|
|
"rewards/frontier_coverage_5": 0.03405772633850575,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14317626953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.192362380027771,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071588134765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.071588134765625,
|
|
"signal/advantage_abs_mean": 0.07465749233961105,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07465749233961105,
|
|
"signal/advantage_pre_scale_std": 0.1136060506105423,
|
|
"signal/advantage_std": 0.1136060506105423,
|
|
"signal/brier_reward/centered_abs_mean": 0.1587459623813629,
|
|
"signal/brier_reward/group_std_mean": 0.1997540056705475,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015874596685171126,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015874596685171126,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08940582275390625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1131935566663742,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008940582629293203,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008940582629293203,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23958866000175477,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.30230913162231443,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23958866000175477,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30230913162231443,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23958866000175477,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30230913162231443,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23958866000175477,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30230913162231443,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23958866000175477,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30230913162231443,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23958866000175477,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.30230913162231443,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23958866000175477,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30230913162231443,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003426117729395628,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003426117729395628,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.454046793538352,
|
|
"calibration/batch_distribution_entropy": 0.8398905686717141,
|
|
"calibration/buffer_distribution_entropy": 0.901052292739411,
|
|
"calibration/confidence_entropy": 0.545608272478401,
|
|
"calibration/coverage@0%": 0.0019546538649706457,
|
|
"calibration/coverage@1%": 0.0019546538649706457,
|
|
"calibration/coverage@10%": 0.009376528864970646,
|
|
"calibration/coverage@15%": 0.012892153864970645,
|
|
"calibration/coverage@20%": 0.016017153864970646,
|
|
"calibration/coverage@25%": 0.017189028864970646,
|
|
"calibration/coverage@30%": 0.03593902886497065,
|
|
"calibration/coverage@5%": 0.0019546538649706457,
|
|
"calibration/ece": 0.1533710485313065,
|
|
"calibration/mean_confidence": 0.3372224861869217,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 423.2,
|
|
"completions/max_terminated_length": 423.2,
|
|
"completions/mean_length": 141.1029296875,
|
|
"completions/mean_terminated_length": 141.13076171875,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 59.6,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0012460550060495734,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 164911548.0,
|
|
"reward": 0.8956733822822571,
|
|
"reward_std": 0.09705854654312134,
|
|
"rewards/accuracy_reward": 0.45341796875,
|
|
"rewards/brier_reward": 0.7379406452178955,
|
|
"rewards/confidence_uniqueness_reward": 0.8653342247009277,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_coverage_0": 0.08823383674025535,
|
|
"rewards/frontier_coverage_1": 0.08823383674025535,
|
|
"rewards/frontier_coverage_10": 0.08823383674025535,
|
|
"rewards/frontier_coverage_15": 0.08823383674025535,
|
|
"rewards/frontier_coverage_20": 0.08823383674025535,
|
|
"rewards/frontier_coverage_25": 0.08823383674025535,
|
|
"rewards/frontier_coverage_5": 0.08823383674025535,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.150860595703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.19206807315349578,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.475,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0754302978515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0754302978515625,
|
|
"signal/advantage_abs_mean": 0.07680515646934509,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07680515646934509,
|
|
"signal/advantage_pre_scale_std": 0.11379086673259735,
|
|
"signal/advantage_std": 0.11379086673259735,
|
|
"signal/brier_reward/centered_abs_mean": 0.15479380786418914,
|
|
"signal/brier_reward/group_std_mean": 0.1973109394311905,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015479381382465362,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015479381382465362,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09476796388626099,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11929279714822769,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009476796537637711,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009476796537637711,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2523395955562592,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.31533271074295044,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2523395955562592,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.31533271074295044,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2523395955562592,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.31533271074295044,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2523395955562592,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.31533271074295044,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2523395955562592,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.31533271074295044,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2523395955562592,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.31533271074295044,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2523395955562592,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.31533271074295044,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036084561608731745,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.5985907180934281,
|
|
"eval_calibration/batch_distribution_entropy": 0.7980443905263197,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9172991735258191,
|
|
"eval_calibration/confidence_entropy": 0.5452766110964884,
|
|
"eval_calibration/coverage@0%": 0.015625,
|
|
"eval_calibration/coverage@1%": 0.015625,
|
|
"eval_calibration/coverage@10%": 0.015625,
|
|
"eval_calibration/coverage@15%": 0.015625,
|
|
"eval_calibration/coverage@20%": 0.0546875,
|
|
"eval_calibration/coverage@25%": 0.078125,
|
|
"eval_calibration/coverage@30%": 0.0859375,
|
|
"eval_calibration/coverage@5%": 0.015625,
|
|
"eval_calibration/ece": 0.20335937500000004,
|
|
"eval_calibration/mean_confidence": 0.359296875,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 284.75,
|
|
"eval_completions/max_terminated_length": 284.75,
|
|
"eval_completions/mean_length": 144.8631477355957,
|
|
"eval_completions/mean_terminated_length": 144.8631477355957,
|
|
"eval_completions/min_length": 74.0,
|
|
"eval_completions/min_terminated_length": 74.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 164911548.0,
|
|
"eval_reward": 0.8594861328601837,
|
|
"eval_reward_std": 0.1926819011569023,
|
|
"eval_rewards/accuracy_reward": 0.373046875,
|
|
"eval_rewards/brier_reward": 0.7505200058221817,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.833251953125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_coverage_0": 0.1457089427858591,
|
|
"eval_rewards/frontier_coverage_1": 0.1457089427858591,
|
|
"eval_rewards/frontier_coverage_10": 0.1457089427858591,
|
|
"eval_rewards/frontier_coverage_15": 0.1457089427858591,
|
|
"eval_rewards/frontier_coverage_20": 0.1457089427858591,
|
|
"eval_rewards/frontier_coverage_25": 0.1457089427858591,
|
|
"eval_rewards/frontier_coverage_5": 0.1457089427858591,
|
|
"eval_runtime": 15.9676,
|
|
"eval_samples_per_second": 31.313,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4466552734375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4786013886332512,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22332763671875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22332763671875,
|
|
"eval_signal/advantage_abs_mean": 0.17472263425588608,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.17472263425588608,
|
|
"eval_signal/advantage_pre_scale_std": 0.19090154394507408,
|
|
"eval_signal/advantage_std": 0.19090154394507408,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19750789552927017,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2354220263659954,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019750789739191532,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019750789739191532,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0953521728515625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1108260452747345,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009535217541269958,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009535217541269958,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.4024868533015251,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.473217248916626,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.4024868533015251,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.473217248916626,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.4024868533015251,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.473217248916626,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.4024868533015251,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.473217248916626,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.4024868533015251,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.473217248916626,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.4024868533015251,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.473217248916626,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.4024868533015251,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.473217248916626,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0057555618695914745,
|
|
"eval_steps_per_second": 0.251,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4537260570762408,
|
|
"calibration/batch_distribution_entropy": 0.9062964846595729,
|
|
"calibration/buffer_distribution_entropy": 0.924649477340763,
|
|
"calibration/confidence_entropy": 0.542944820963166,
|
|
"calibration/coverage@0%": 0.0015625,
|
|
"calibration/coverage@1%": 0.0015625,
|
|
"calibration/coverage@10%": 0.0015625,
|
|
"calibration/coverage@15%": 0.0015625,
|
|
"calibration/coverage@20%": 0.0078125,
|
|
"calibration/coverage@25%": 0.022265625,
|
|
"calibration/coverage@30%": 0.115234375,
|
|
"calibration/coverage@5%": 0.0015625,
|
|
"calibration/ece": 0.18426962719962278,
|
|
"calibration/mean_confidence": 0.3850698915774798,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 452.8,
|
|
"completions/max_terminated_length": 452.8,
|
|
"completions/mean_length": 149.6189453125,
|
|
"completions/mean_terminated_length": 149.6783416748047,
|
|
"completions/min_length": 39.6,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0017046101856976748,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 181680766.0,
|
|
"reward": 0.8976470112800599,
|
|
"reward_std": 0.09877448678016662,
|
|
"rewards/accuracy_reward": 0.44765625,
|
|
"rewards/brier_reward": 0.7434515833854676,
|
|
"rewards/confidence_uniqueness_reward": 0.893541157245636,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_coverage_0": 0.10353371798992157,
|
|
"rewards/frontier_coverage_1": 0.10353371798992157,
|
|
"rewards/frontier_coverage_10": 0.10353371798992157,
|
|
"rewards/frontier_coverage_15": 0.10353371798992157,
|
|
"rewards/frontier_coverage_20": 0.10353371798992157,
|
|
"rewards/frontier_coverage_25": 0.10353371798992157,
|
|
"rewards/frontier_coverage_5": 0.10353371798992157,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15311279296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.19436658024787903,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.475,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.076556396484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.076556396484375,
|
|
"signal/advantage_abs_mean": 0.07793679982423782,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07793679982423782,
|
|
"signal/advantage_pre_scale_std": 0.11506912857294083,
|
|
"signal/advantage_std": 0.11506912857294083,
|
|
"signal/brier_reward/centered_abs_mean": 0.16262381374835969,
|
|
"signal/brier_reward/group_std_mean": 0.20505278408527375,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016262382455170154,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016262382455170154,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06747991964221,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08725375980138779,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006747992150485516,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006747992150485516,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.002762135770171881,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.25853300988674166,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.320653623342514,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.25853300988674166,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.320653623342514,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.25853300988674166,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.320653623342514,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.25853300988674166,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.320653623342514,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.25853300988674166,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.320653623342514,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.25853300988674166,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.320653623342514,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.25853300988674166,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.320653623342514,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003697022097185254,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003697022097185254,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3573644592066947,
|
|
"calibration/batch_distribution_entropy": 0.9555272460757454,
|
|
"calibration/buffer_distribution_entropy": 0.9379707462670741,
|
|
"calibration/confidence_entropy": 0.5269331213271102,
|
|
"calibration/coverage@0%": 0.0015640288649706457,
|
|
"calibration/coverage@1%": 0.0015640288649706457,
|
|
"calibration/coverage@10%": 0.010565985812133073,
|
|
"calibration/coverage@15%": 0.01721960616438356,
|
|
"calibration/coverage@20%": 0.10710387108610568,
|
|
"calibration/coverage@25%": 0.18689151174168295,
|
|
"calibration/coverage@30%": 0.2851256727005871,
|
|
"calibration/coverage@5%": 0.010565985812133073,
|
|
"calibration/ece": 0.11475980011122759,
|
|
"calibration/mean_confidence": 0.4282593191460644,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 440.2,
|
|
"completions/max_terminated_length": 440.2,
|
|
"completions/mean_length": 157.43359375,
|
|
"completions/mean_terminated_length": 157.4958282470703,
|
|
"completions/min_length": 41.4,
|
|
"completions/min_terminated_length": 65.8,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.0011922204867005348,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 198107702.0,
|
|
"reward": 0.9124896407127381,
|
|
"reward_std": 0.10086087137460709,
|
|
"rewards/accuracy_reward": 0.47177734375,
|
|
"rewards/brier_reward": 0.7537966251373291,
|
|
"rewards/confidence_uniqueness_reward": 0.9138024330139161,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_coverage_0": 0.10172683596611024,
|
|
"rewards/frontier_coverage_1": 0.10172683596611024,
|
|
"rewards/frontier_coverage_10": 0.10172683596611024,
|
|
"rewards/frontier_coverage_15": 0.10172683596611024,
|
|
"rewards/frontier_coverage_20": 0.10172683596611024,
|
|
"rewards/frontier_coverage_25": 0.10172683596611024,
|
|
"rewards/frontier_coverage_5": 0.10172683596611024,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.146429443359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1905221551656723,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.46875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0732147216796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0732147216796875,
|
|
"signal/advantage_abs_mean": 0.0779627725481987,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0779627725481987,
|
|
"signal/advantage_pre_scale_std": 0.1176736056804657,
|
|
"signal/advantage_std": 0.1176736056804657,
|
|
"signal/brier_reward/centered_abs_mean": 0.1688424438238144,
|
|
"signal/brier_reward/group_std_mean": 0.2119818925857544,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0168842451646924,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0168842451646924,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05015767216682434,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06539249867200851,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005015767458826303,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005015767458826303,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.003866990143433213,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.24883936941623688,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.31324060559272765,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24883936941623688,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.31324060559272765,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24883936941623688,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.31324060559272765,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24883936941623688,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.31324060559272765,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.24883936941623688,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.31324060559272765,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.24883936941623688,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.31324060559272765,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24883936941623688,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.31324060559272765,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003558403067290783,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003558403067290783,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30967251532063694,
|
|
"calibration/batch_distribution_entropy": 0.9789505653815354,
|
|
"calibration/buffer_distribution_entropy": 0.9463065200388749,
|
|
"calibration/confidence_entropy": 0.5029713373170281,
|
|
"calibration/coverage@0%": 0.015625,
|
|
"calibration/coverage@1%": 0.015625,
|
|
"calibration/coverage@10%": 0.075,
|
|
"calibration/coverage@15%": 0.179296875,
|
|
"calibration/coverage@20%": 0.304296875,
|
|
"calibration/coverage@25%": 0.48671875,
|
|
"calibration/coverage@30%": 0.541796875,
|
|
"calibration/coverage@5%": 0.015625,
|
|
"calibration/ece": 0.1678404179115396,
|
|
"calibration/mean_confidence": 0.4842072225556321,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 428.2,
|
|
"completions/max_terminated_length": 428.2,
|
|
"completions/mean_length": 165.9119140625,
|
|
"completions/mean_terminated_length": 165.9119140625,
|
|
"completions/min_length": 74.8,
|
|
"completions/min_terminated_length": 74.8,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.001235796487890184,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 214838864.0,
|
|
"reward": 0.9380666136741638,
|
|
"reward_std": 0.10160990357398987,
|
|
"rewards/accuracy_reward": 0.52392578125,
|
|
"rewards/brier_reward": 0.7590964794158935,
|
|
"rewards/confidence_uniqueness_reward": 0.9277396678924561,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.07510250061750412,
|
|
"rewards/frontier_coverage_1": 0.07510250061750412,
|
|
"rewards/frontier_coverage_10": 0.07510250061750412,
|
|
"rewards/frontier_coverage_15": 0.07510250061750412,
|
|
"rewards/frontier_coverage_20": 0.07510250061750412,
|
|
"rewards/frontier_coverage_25": 0.07510250061750412,
|
|
"rewards/frontier_coverage_5": 0.07510250061750412,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.139874267578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1816681444644928,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0699371337890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0699371337890625,
|
|
"signal/advantage_abs_mean": 0.0794785276055336,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0794785276055336,
|
|
"signal/advantage_pre_scale_std": 0.12093794941902161,
|
|
"signal/advantage_std": 0.12093794941902161,
|
|
"signal/brier_reward/centered_abs_mean": 0.17095426023006438,
|
|
"signal/brier_reward/group_std_mean": 0.21720809936523439,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017095426470041274,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017095426470041274,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039772205799818036,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04772929325699806,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003977220831438899,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003977220831438899,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.229102823138237,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29095078706741334,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.229102823138237,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29095078706741334,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.229102823138237,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29095078706741334,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.229102823138237,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29095078706741334,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.229102823138237,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29095078706741334,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.229102823138237,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29095078706741334,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.229102823138237,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29095078706741334,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003276170324534178,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003276170324534178,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3461947025366329,
|
|
"calibration/batch_distribution_entropy": 0.9871950056652427,
|
|
"calibration/buffer_distribution_entropy": 0.9528396482217868,
|
|
"calibration/confidence_entropy": 0.47853435639134895,
|
|
"calibration/coverage@0%": 0.00390625,
|
|
"calibration/coverage@1%": 0.00390625,
|
|
"calibration/coverage@10%": 0.026998226516634048,
|
|
"calibration/coverage@15%": 0.1576489114481409,
|
|
"calibration/coverage@20%": 0.2221891817514677,
|
|
"calibration/coverage@25%": 0.3430818860078278,
|
|
"calibration/coverage@30%": 0.44168909001956946,
|
|
"calibration/coverage@5%": 0.00390625,
|
|
"calibration/ece": 0.1482485175463591,
|
|
"calibration/mean_confidence": 0.4928267662792173,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 458.4,
|
|
"completions/max_terminated_length": 458.4,
|
|
"completions/mean_length": 169.5767578125,
|
|
"completions/mean_terminated_length": 169.69363708496093,
|
|
"completions/min_length": 14.6,
|
|
"completions/min_terminated_length": 73.2,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0013179010711610317,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 231728514.0,
|
|
"reward": 0.9151730179786682,
|
|
"reward_std": 0.10305744558572769,
|
|
"rewards/accuracy_reward": 0.4703125,
|
|
"rewards/brier_reward": 0.7591888546943665,
|
|
"rewards/confidence_uniqueness_reward": 0.9291624546051025,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_coverage_0": 0.11560697555541992,
|
|
"rewards/frontier_coverage_1": 0.11560697555541992,
|
|
"rewards/frontier_coverage_10": 0.11560697555541992,
|
|
"rewards/frontier_coverage_15": 0.11560697555541992,
|
|
"rewards/frontier_coverage_20": 0.11560697555541992,
|
|
"rewards/frontier_coverage_25": 0.11560697555541992,
|
|
"rewards/frontier_coverage_5": 0.11560697555541992,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12928466796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.17319436967372895,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.064642333984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.064642333984375,
|
|
"signal/advantage_abs_mean": 0.07788805365562439,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07788805365562439,
|
|
"signal/advantage_pre_scale_std": 0.12274336367845536,
|
|
"signal/advantage_std": 0.12274336367845536,
|
|
"signal/brier_reward/centered_abs_mean": 0.1776938557624817,
|
|
"signal/brier_reward/group_std_mean": 0.22498490810394287,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01776938550174236,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01776938550174236,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04329204186797142,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05236217975616455,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004329204373061657,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004329204373061657,
|
|
"signal/format_reward/centered_abs_mean": 0.00150146484375,
|
|
"signal/format_reward/group_std_mean": 0.004083108901977539,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2169666290283203,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2803891360759735,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2169666290283203,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2803891360759735,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2169666290283203,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2803891360759735,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2169666290283203,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2803891360759735,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2169666290283203,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2803891360759735,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2169666290283203,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2803891360759735,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2169666290283203,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2803891360759735,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031026228331029414,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3850509745798207,
|
|
"calibration/batch_distribution_entropy": 0.9636978993198815,
|
|
"calibration/buffer_distribution_entropy": 0.9581510600414204,
|
|
"calibration/confidence_entropy": 0.4726358455065284,
|
|
"calibration/coverage@0%": 0.01171875,
|
|
"calibration/coverage@1%": 0.01171875,
|
|
"calibration/coverage@10%": 0.128125,
|
|
"calibration/coverage@15%": 0.19259112035225048,
|
|
"calibration/coverage@20%": 0.21604696673189822,
|
|
"calibration/coverage@25%": 0.23679060665362034,
|
|
"calibration/coverage@30%": 0.24187866927592952,
|
|
"calibration/coverage@5%": 0.07578125,
|
|
"calibration/ece": 0.18973087646757542,
|
|
"calibration/mean_confidence": 0.5367774832772495,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 453.8,
|
|
"completions/max_terminated_length": 453.8,
|
|
"completions/mean_length": 174.0443359375,
|
|
"completions/mean_terminated_length": 174.1128356933594,
|
|
"completions/min_length": 47.4,
|
|
"completions/min_terminated_length": 79.0,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0013802044559270144,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 248762408.0,
|
|
"reward": 0.944013798236847,
|
|
"reward_std": 0.11029932498931885,
|
|
"rewards/accuracy_reward": 0.53603515625,
|
|
"rewards/brier_reward": 0.7548027634620667,
|
|
"rewards/confidence_uniqueness_reward": 0.934007465839386,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_coverage_0": 0.07400779929012061,
|
|
"rewards/frontier_coverage_1": 0.07400779929012061,
|
|
"rewards/frontier_coverage_10": 0.07400779929012061,
|
|
"rewards/frontier_coverage_15": 0.07400779929012061,
|
|
"rewards/frontier_coverage_20": 0.07400779929012061,
|
|
"rewards/frontier_coverage_25": 0.07400779929012061,
|
|
"rewards/frontier_coverage_5": 0.07400779929012061,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.148919677734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.19519998431205748,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0744598388671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0744598388671875,
|
|
"signal/advantage_abs_mean": 0.08497563004493713,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08497563004493713,
|
|
"signal/advantage_pre_scale_std": 0.13136209100484847,
|
|
"signal/advantage_std": 0.13136209100484847,
|
|
"signal/brier_reward/centered_abs_mean": 0.1855131357908249,
|
|
"signal/brier_reward/group_std_mean": 0.2328798860311508,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018551314249634743,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018551314249634743,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04416573867201805,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.053702594339847566,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004416573978960514,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004416573978960514,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21642023622989653,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.283210289478302,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21642023622989653,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.283210289478302,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21642023622989653,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.283210289478302,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21642023622989653,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.283210289478302,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21642023622989653,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.283210289478302,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21642023622989653,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.283210289478302,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21642023622989653,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.283210289478302,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030948093626648188,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.309607338599339,
|
|
"calibration/batch_distribution_entropy": 0.9670440695097444,
|
|
"calibration/buffer_distribution_entropy": 0.9620481829535976,
|
|
"calibration/confidence_entropy": 0.4398015262158295,
|
|
"calibration/coverage@0%": 0.004307598039215686,
|
|
"calibration/coverage@1%": 0.004307598039215686,
|
|
"calibration/coverage@10%": 0.09665747549019607,
|
|
"calibration/coverage@15%": 0.237280943627451,
|
|
"calibration/coverage@20%": 0.34340226715686273,
|
|
"calibration/coverage@25%": 0.4869592524509804,
|
|
"calibration/coverage@30%": 0.6019806985294117,
|
|
"calibration/coverage@5%": 0.00940563725490196,
|
|
"calibration/ece": 0.12086222182290382,
|
|
"calibration/mean_confidence": 0.5231612283193191,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 597.2,
|
|
"completions/max_terminated_length": 597.2,
|
|
"completions/mean_length": 172.6607421875,
|
|
"completions/mean_terminated_length": 172.8123809814453,
|
|
"completions/min_length": 14.2,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0012472033267840743,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 265585270.0,
|
|
"reward": 0.933080542087555,
|
|
"reward_std": 0.10499893128871918,
|
|
"rewards/accuracy_reward": 0.50439453125,
|
|
"rewards/brier_reward": 0.7625807642936706,
|
|
"rewards/confidence_uniqueness_reward": 0.9408350467681885,
|
|
"rewards/format_reward": 0.99873046875,
|
|
"rewards/frontier_coverage_0": 0.11165271587669849,
|
|
"rewards/frontier_coverage_1": 0.11165271587669849,
|
|
"rewards/frontier_coverage_10": 0.11165271587669849,
|
|
"rewards/frontier_coverage_15": 0.11165271587669849,
|
|
"rewards/frontier_coverage_20": 0.11165271587669849,
|
|
"rewards/frontier_coverage_25": 0.11165271587669849,
|
|
"rewards/frontier_coverage_5": 0.11165271587669849,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.141839599609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.18213264644145966,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0709197998046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0709197998046875,
|
|
"signal/advantage_abs_mean": 0.08127593994140625,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08127593994140625,
|
|
"signal/advantage_pre_scale_std": 0.12862132340669633,
|
|
"signal/advantage_std": 0.12862132340669633,
|
|
"signal/brier_reward/centered_abs_mean": 0.17898644208908082,
|
|
"signal/brier_reward/group_std_mean": 0.22654514908790588,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017898644879460336,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017898644879460336,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.039608802646398544,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05151473581790924,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003960880218073726,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003960880218073726,
|
|
"signal/format_reward/centered_abs_mean": 0.002459716796875,
|
|
"signal/format_reward/group_std_mean": 0.007181553030386567,
|
|
"signal/format_reward/group_zero_std_frac": 0.959375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21713967025279998,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2821187674999237,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21713967025279998,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2821187674999237,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21713967025279998,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2821187674999237,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21713967025279998,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2821187674999237,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21713967025279998,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2821187674999237,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21713967025279998,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2821187674999237,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21713967025279998,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2821187674999237,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00310509717091918,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00310509717091918,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3896367144097038,
|
|
"calibration/batch_distribution_entropy": 0.9838301207068794,
|
|
"calibration/buffer_distribution_entropy": 0.9661401903129894,
|
|
"calibration/confidence_entropy": 0.46300136102847683,
|
|
"calibration/coverage@0%": 0.008991254892367906,
|
|
"calibration/coverage@1%": 0.008991254892367906,
|
|
"calibration/coverage@10%": 0.0543068126223092,
|
|
"calibration/coverage@15%": 0.10274431262230918,
|
|
"calibration/coverage@20%": 0.1363380626223092,
|
|
"calibration/coverage@25%": 0.2075288955479452,
|
|
"calibration/coverage@30%": 0.2786868578767123,
|
|
"calibration/coverage@5%": 0.026960004892367904,
|
|
"calibration/ece": 0.1511877769353029,
|
|
"calibration/mean_confidence": 0.4922436817696364,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 424.2,
|
|
"completions/max_terminated_length": 424.2,
|
|
"completions/mean_length": 180.82041015625,
|
|
"completions/mean_terminated_length": 180.90801086425782,
|
|
"completions/min_length": 17.2,
|
|
"completions/min_terminated_length": 82.6,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0010296551045030355,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 282402567.0,
|
|
"reward": 0.9305618286132813,
|
|
"reward_std": 0.09933947324752808,
|
|
"rewards/accuracy_reward": 0.49638671875,
|
|
"rewards/brier_reward": 0.7602882385253906,
|
|
"rewards/confidence_uniqueness_reward": 0.9493825793266296,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_coverage_0": 0.11731471288949251,
|
|
"rewards/frontier_coverage_1": 0.11731471288949251,
|
|
"rewards/frontier_coverage_10": 0.11731471288949251,
|
|
"rewards/frontier_coverage_15": 0.11731471288949251,
|
|
"rewards/frontier_coverage_20": 0.11731471288949251,
|
|
"rewards/frontier_coverage_25": 0.11731471288949251,
|
|
"rewards/frontier_coverage_5": 0.11731471288949251,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.131195068359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.16806706488132478,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0655975341796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0655975341796875,
|
|
"signal/advantage_abs_mean": 0.0770923689007759,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0770923689007759,
|
|
"signal/advantage_pre_scale_std": 0.12059997916221618,
|
|
"signal/advantage_std": 0.12059997916221618,
|
|
"signal/brier_reward/centered_abs_mean": 0.18005068302154542,
|
|
"signal/brier_reward/group_std_mean": 0.2274363726377487,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018005067855119704,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018005067855119704,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029679254069924353,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03936323225498199,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029679253231734036,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029679253231734036,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.003866990143433213,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2299924910068512,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29544530510902406,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2299924910068512,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29544530510902406,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2299924910068512,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29544530510902406,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2299924910068512,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29544530510902406,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2299924910068512,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29544530510902406,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2299924910068512,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29544530510902406,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2299924910068512,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29544530510902406,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032888925168663265,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33589424683068303,
|
|
"calibration/batch_distribution_entropy": 0.9830515504914701,
|
|
"calibration/buffer_distribution_entropy": 0.9701992389099413,
|
|
"calibration/confidence_entropy": 0.46065174492897754,
|
|
"calibration/coverage@0%": 0.00234375,
|
|
"calibration/coverage@1%": 0.00234375,
|
|
"calibration/coverage@10%": 0.05237898284313726,
|
|
"calibration/coverage@15%": 0.10512558876290243,
|
|
"calibration/coverage@20%": 0.13598649262787305,
|
|
"calibration/coverage@25%": 0.2674431951719044,
|
|
"calibration/coverage@30%": 0.37270895087487055,
|
|
"calibration/coverage@5%": 0.011363357843137255,
|
|
"calibration/ece": 0.13230197746702937,
|
|
"calibration/mean_confidence": 0.5004645331904743,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 453.6,
|
|
"completions/max_terminated_length": 453.6,
|
|
"completions/mean_length": 177.79169921875,
|
|
"completions/mean_terminated_length": 177.91405029296874,
|
|
"completions/min_length": 31.0,
|
|
"completions/min_terminated_length": 80.6,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0011328975670039654,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0006,
|
|
"num_tokens": 299181330.0,
|
|
"reward": 0.9354893803596497,
|
|
"reward_std": 0.10109454691410065,
|
|
"rewards/accuracy_reward": 0.50869140625,
|
|
"rewards/brier_reward": 0.7558692455291748,
|
|
"rewards/confidence_uniqueness_reward": 0.9514668345451355,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_coverage_0": 0.10887458622455597,
|
|
"rewards/frontier_coverage_1": 0.10887458622455597,
|
|
"rewards/frontier_coverage_10": 0.10887458622455597,
|
|
"rewards/frontier_coverage_15": 0.10887458622455597,
|
|
"rewards/frontier_coverage_20": 0.10887458622455597,
|
|
"rewards/frontier_coverage_25": 0.10887458622455597,
|
|
"rewards/frontier_coverage_5": 0.10887458622455597,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.139923095703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1817190706729889,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0699615478515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0699615478515625,
|
|
"signal/advantage_abs_mean": 0.07745532691478729,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07745532691478729,
|
|
"signal/advantage_pre_scale_std": 0.12147206962108612,
|
|
"signal/advantage_std": 0.12147206962108612,
|
|
"signal/brier_reward/centered_abs_mean": 0.1812896490097046,
|
|
"signal/brier_reward/group_std_mean": 0.2296443372964859,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01812896430492401,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01812896430492401,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027077151462435722,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03699265941977501,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027077150996774437,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027077150996774437,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_std_mean": 0.005524271540343762,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2429211437702179,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.30920409560203554,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2429211437702179,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30920409560203554,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2429211437702179,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30920409560203554,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2429211437702179,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30920409560203554,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2429211437702179,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30920409560203554,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2429211437702179,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.30920409560203554,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2429211437702179,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30920409560203554,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034737725276499988,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3082216996890913,
|
|
"calibration/batch_distribution_entropy": 0.9745983733554165,
|
|
"calibration/buffer_distribution_entropy": 0.9733239639462091,
|
|
"calibration/confidence_entropy": 0.4559858287117121,
|
|
"calibration/coverage@0%": 0.006652879901960784,
|
|
"calibration/coverage@1%": 0.006652879901960784,
|
|
"calibration/coverage@10%": 0.09274963367196194,
|
|
"calibration/coverage@15%": 0.1577411679569088,
|
|
"calibration/coverage@20%": 0.26820531037757567,
|
|
"calibration/coverage@25%": 0.3374852539474694,
|
|
"calibration/coverage@30%": 0.526755844461072,
|
|
"calibration/coverage@5%": 0.03674938725490196,
|
|
"calibration/ece": 0.14556641294167272,
|
|
"calibration/mean_confidence": 0.4999809672027273,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 588.4,
|
|
"completions/max_terminated_length": 588.4,
|
|
"completions/mean_length": 183.04345703125,
|
|
"completions/mean_terminated_length": 183.18773193359374,
|
|
"completions/min_length": 15.8,
|
|
"completions/min_terminated_length": 85.8,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0011761499335989356,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 315985647.0,
|
|
"reward": 0.9316917419433594,
|
|
"reward_std": 0.09645482301712036,
|
|
"rewards/accuracy_reward": 0.50341796875,
|
|
"rewards/brier_reward": 0.7473922848701477,
|
|
"rewards/confidence_uniqueness_reward": 0.9497161388397217,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_coverage_0": 0.10749451220035552,
|
|
"rewards/frontier_coverage_1": 0.10749451220035552,
|
|
"rewards/frontier_coverage_10": 0.10749451220035552,
|
|
"rewards/frontier_coverage_15": 0.10749451220035552,
|
|
"rewards/frontier_coverage_20": 0.10749451220035552,
|
|
"rewards/frontier_coverage_25": 0.10749451220035552,
|
|
"rewards/frontier_coverage_5": 0.10749451220035552,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.131561279296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.17328137457370757,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0657806396484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0657806396484375,
|
|
"signal/advantage_abs_mean": 0.07330347746610641,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07330347746610641,
|
|
"signal/advantage_pre_scale_std": 0.11479498445987701,
|
|
"signal/advantage_std": 0.11479498445987701,
|
|
"signal/brier_reward/centered_abs_mean": 0.1852072387933731,
|
|
"signal/brier_reward/group_std_mean": 0.23257068395614625,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018520724028348923,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018520724028348923,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02673105485737324,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03708546310663223,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026731055695563555,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026731055695563555,
|
|
"signal/format_reward/centered_abs_mean": 0.0018798828125,
|
|
"signal/format_reward/group_std_mean": 0.005187963135540485,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00093994140625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2502656430006027,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3177207946777344,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2502656430006027,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3177207946777344,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2502656430006027,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3177207946777344,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2502656430006027,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3177207946777344,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2502656430006027,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3177207946777344,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2502656430006027,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3177207946777344,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2502656430006027,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3177207946777344,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035787987522780894,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2517693569418261,
|
|
"calibration/batch_distribution_entropy": 0.9731332859372502,
|
|
"calibration/buffer_distribution_entropy": 0.9755470528339438,
|
|
"calibration/confidence_entropy": 0.44581740909361167,
|
|
"calibration/coverage@0%": 0.0125,
|
|
"calibration/coverage@1%": 0.0125,
|
|
"calibration/coverage@10%": 0.175390625,
|
|
"calibration/coverage@15%": 0.357421875,
|
|
"calibration/coverage@20%": 0.455078125,
|
|
"calibration/coverage@25%": 0.553515625,
|
|
"calibration/coverage@30%": 0.68046875,
|
|
"calibration/coverage@5%": 0.091796875,
|
|
"calibration/ece": 0.1454404464407266,
|
|
"calibration/mean_confidence": 0.5241102003842935,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 612.2,
|
|
"completions/max_terminated_length": 612.2,
|
|
"completions/mean_length": 186.9810546875,
|
|
"completions/mean_terminated_length": 187.07222900390624,
|
|
"completions/min_length": 51.2,
|
|
"completions/min_terminated_length": 81.8,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0009133943822234869,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 332989037.0,
|
|
"reward": 0.9471798062324523,
|
|
"reward_std": 0.082899671792984,
|
|
"rewards/accuracy_reward": 0.528515625,
|
|
"rewards/brier_reward": 0.767364501953125,
|
|
"rewards/confidence_uniqueness_reward": 0.9538532257080078,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_coverage_0": 0.11033322885632516,
|
|
"rewards/frontier_coverage_1": 0.11033322885632516,
|
|
"rewards/frontier_coverage_10": 0.11033322885632516,
|
|
"rewards/frontier_coverage_15": 0.11033322885632516,
|
|
"rewards/frontier_coverage_20": 0.11033322885632516,
|
|
"rewards/frontier_coverage_25": 0.11033322885632516,
|
|
"rewards/frontier_coverage_5": 0.11033322885632516,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0957275390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13498952388763427,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04786376953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04786376953125,
|
|
"signal/advantage_abs_mean": 0.06182228252291679,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06182228252291679,
|
|
"signal/advantage_pre_scale_std": 0.10263902097940444,
|
|
"signal/advantage_std": 0.10263902097940444,
|
|
"signal/brier_reward/centered_abs_mean": 0.1715441018342972,
|
|
"signal/brier_reward/group_std_mean": 0.21891236901283265,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01715441085398197,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01715441085398197,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023247013986110687,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03069368377327919,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023247014032676816,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023247014032676816,
|
|
"signal/format_reward/centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/group_std_mean": 0.0024258273653686045,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21744668185710908,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2794205367565155,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21744668185710908,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2794205367565155,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21744668185710908,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2794205367565155,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21744668185710908,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2794205367565155,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21744668185710908,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2794205367565155,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21744668185710908,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2794205367565155,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21744668185710908,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2794205367565155,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003109487472102046,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003109487472102046,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.4931819979797931,
|
|
"eval_calibration/batch_distribution_entropy": 0.8744966415146431,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9766198744091587,
|
|
"eval_calibration/confidence_entropy": 0.41882069482680984,
|
|
"eval_calibration/coverage@0%": 0.0234375,
|
|
"eval_calibration/coverage@1%": 0.0234375,
|
|
"eval_calibration/coverage@10%": 0.0234375,
|
|
"eval_calibration/coverage@15%": 0.078125,
|
|
"eval_calibration/coverage@20%": 0.078125,
|
|
"eval_calibration/coverage@25%": 0.1171875,
|
|
"eval_calibration/coverage@30%": 0.203125,
|
|
"eval_calibration/coverage@5%": 0.0234375,
|
|
"eval_calibration/ece": 0.24737018579060577,
|
|
"eval_calibration/mean_confidence": 0.44411252086535724,
|
|
"eval_completions/clipped_ratio": 0.002155172413793094,
|
|
"eval_completions/max_length": 431.5,
|
|
"eval_completions/max_terminated_length": 431.5,
|
|
"eval_completions/mean_length": 193.44598770141602,
|
|
"eval_completions/mean_terminated_length": 193.86253356933594,
|
|
"eval_completions/min_length": 78.75,
|
|
"eval_completions/min_terminated_length": 106.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 332989037.0,
|
|
"eval_reward": 0.8956393599510193,
|
|
"eval_reward_std": 0.21798087283968925,
|
|
"eval_rewards/accuracy_reward": 0.41796875,
|
|
"eval_rewards/brier_reward": 0.779339388012886,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.891444057226181,
|
|
"eval_rewards/format_reward": 0.998046875,
|
|
"eval_rewards/frontier_coverage_0": 0.20532679185271263,
|
|
"eval_rewards/frontier_coverage_1": 0.20532679185271263,
|
|
"eval_rewards/frontier_coverage_10": 0.20532679185271263,
|
|
"eval_rewards/frontier_coverage_15": 0.20532679185271263,
|
|
"eval_rewards/frontier_coverage_20": 0.20532679185271263,
|
|
"eval_rewards/frontier_coverage_25": 0.20532679185271263,
|
|
"eval_rewards/frontier_coverage_5": 0.20532679185271263,
|
|
"eval_runtime": 30.2085,
|
|
"eval_samples_per_second": 16.552,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.474609375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4946169927716255,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2373046875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2373046875,
|
|
"eval_signal/advantage_abs_mean": 0.19964107125997543,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19964107125997543,
|
|
"eval_signal/advantage_pre_scale_std": 0.21583576127886772,
|
|
"eval_signal/advantage_std": 0.21583576127886772,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21933908015489578,
|
|
"eval_signal/brier_reward/group_std_mean": 0.27718352526426315,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021933908574283123,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021933908574283123,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.049210578203201294,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.060637932270765305,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004921057727187872,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004921057727187872,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.41015545278787613,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.5025398880243301,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.41015545278787613,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.5025398880243301,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.41015545278787613,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.5025398880243301,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.41015545278787613,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.5025398880243301,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.41015545278787613,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.5025398880243301,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.41015545278787613,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.5025398880243301,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.41015545278787613,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.5025398880243301,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005865223123691976,
|
|
"eval_steps_per_second": 0.132,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31803164040980525,
|
|
"calibration/batch_distribution_entropy": 0.9603039380902738,
|
|
"calibration/buffer_distribution_entropy": 0.9789385627675109,
|
|
"calibration/confidence_entropy": 0.4421431351101009,
|
|
"calibration/coverage@0%": 0.004296875,
|
|
"calibration/coverage@1%": 0.004296875,
|
|
"calibration/coverage@10%": 0.01328125,
|
|
"calibration/coverage@15%": 0.06171875,
|
|
"calibration/coverage@20%": 0.12617951932485322,
|
|
"calibration/coverage@25%": 0.32703644814090016,
|
|
"calibration/coverage@30%": 0.5084095217710372,
|
|
"calibration/coverage@5%": 0.004296875,
|
|
"calibration/ece": 0.1597455389352755,
|
|
"calibration/mean_confidence": 0.4623863560801051,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 586.4,
|
|
"completions/max_terminated_length": 586.4,
|
|
"completions/mean_length": 194.1740234375,
|
|
"completions/mean_terminated_length": 194.2508087158203,
|
|
"completions/min_length": 16.8,
|
|
"completions/min_terminated_length": 83.4,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0010002412600442767,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 349699811.0,
|
|
"reward": 0.946380615234375,
|
|
"reward_std": 0.09110937118530274,
|
|
"rewards/accuracy_reward": 0.53134765625,
|
|
"rewards/brier_reward": 0.7578034162521362,
|
|
"rewards/confidence_uniqueness_reward": 0.953113317489624,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_coverage_0": 0.09946960732340812,
|
|
"rewards/frontier_coverage_1": 0.09946960732340812,
|
|
"rewards/frontier_coverage_10": 0.09946960732340812,
|
|
"rewards/frontier_coverage_15": 0.09946960732340812,
|
|
"rewards/frontier_coverage_20": 0.09946960732340812,
|
|
"rewards/frontier_coverage_25": 0.09946960732340812,
|
|
"rewards/frontier_coverage_5": 0.09946960732340812,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.113885498046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.15445185005664824,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0569427490234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0569427490234375,
|
|
"signal/advantage_abs_mean": 0.06859076172113418,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06859076172113418,
|
|
"signal/advantage_pre_scale_std": 0.11051983535289764,
|
|
"signal/advantage_std": 0.11051983535289764,
|
|
"signal/brier_reward/centered_abs_mean": 0.17820558547973633,
|
|
"signal/brier_reward/group_std_mean": 0.22512085735797882,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017820559069514276,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017820559069514276,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023565568774938584,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03247289955615997,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023565569426864386,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023565569426864386,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2296843409538269,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2940028965473175,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2296843409538269,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2940028965473175,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2296843409538269,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2940028965473175,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2296843409538269,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2940028965473175,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2296843409538269,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2940028965473175,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2296843409538269,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2940028965473175,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2296843409538269,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2940028965473175,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003284486150369048,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003284486150369048,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33995086829546933,
|
|
"calibration/batch_distribution_entropy": 0.9172722143051842,
|
|
"calibration/buffer_distribution_entropy": 0.9850987655947868,
|
|
"calibration/confidence_entropy": 0.39972877336110646,
|
|
"calibration/coverage@0%": 0.006271440082882468,
|
|
"calibration/coverage@1%": 0.006271440082882468,
|
|
"calibration/coverage@10%": 0.1462585299429609,
|
|
"calibration/coverage@15%": 0.268205854981136,
|
|
"calibration/coverage@20%": 0.3480707272055084,
|
|
"calibration/coverage@25%": 0.40918934329610385,
|
|
"calibration/coverage@30%": 0.46561599619623495,
|
|
"calibration/coverage@5%": 0.014506734200529527,
|
|
"calibration/ece": 0.14900923213787592,
|
|
"calibration/mean_confidence": 0.42059892334335897,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00126953125,
|
|
"completions/max_length": 620.8,
|
|
"completions/max_terminated_length": 620.8,
|
|
"completions/mean_length": 200.08525390625,
|
|
"completions/mean_terminated_length": 200.34373168945314,
|
|
"completions/min_length": 17.2,
|
|
"completions/min_terminated_length": 92.2,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.001090504345484078,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 367009100.0,
|
|
"reward": 0.9180683374404908,
|
|
"reward_std": 0.09404533058404922,
|
|
"rewards/accuracy_reward": 0.4623046875,
|
|
"rewards/brier_reward": 0.7684449315071106,
|
|
"rewards/confidence_uniqueness_reward": 0.9460180521011352,
|
|
"rewards/format_reward": 0.99853515625,
|
|
"rewards/frontier_coverage_0": 0.16185927987098694,
|
|
"rewards/frontier_coverage_1": 0.16185927987098694,
|
|
"rewards/frontier_coverage_10": 0.16185927987098694,
|
|
"rewards/frontier_coverage_15": 0.16185927987098694,
|
|
"rewards/frontier_coverage_20": 0.16185927987098694,
|
|
"rewards/frontier_coverage_25": 0.16185927987098694,
|
|
"rewards/frontier_coverage_5": 0.16185927987098694,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11962890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1559920936822891,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059814453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059814453125,
|
|
"signal/advantage_abs_mean": 0.0710756614804268,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0710756614804268,
|
|
"signal/advantage_pre_scale_std": 0.11461780071258545,
|
|
"signal/advantage_std": 0.11461780071258545,
|
|
"signal/brier_reward/centered_abs_mean": 0.17437887489795684,
|
|
"signal/brier_reward/group_std_mean": 0.2231445223093033,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01743788719177246,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01743788719177246,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02977934628725052,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0412301205098629,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029779347125440834,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029779347125440834,
|
|
"signal/format_reward/centered_abs_mean": 0.002813720703125,
|
|
"signal/format_reward/group_std_mean": 0.007613790640607476,
|
|
"signal/format_reward/group_zero_std_frac": 0.959375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014068603515625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0014068603515625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2336806982755661,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2980573236942291,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2336806982755661,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2980573236942291,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2336806982755661,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2980573236942291,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2336806982755661,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2980573236942291,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2336806982755661,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2980573236942291,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2336806982755661,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2980573236942291,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2336806982755661,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2980573236942291,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003341634012758732,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003341634012758732,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3871581697347807,
|
|
"calibration/batch_distribution_entropy": 0.9375974113072278,
|
|
"calibration/buffer_distribution_entropy": 0.9893474272345537,
|
|
"calibration/confidence_entropy": 0.4084952818758949,
|
|
"calibration/coverage@0%": 0.00078125,
|
|
"calibration/coverage@1%": 0.00078125,
|
|
"calibration/coverage@10%": 0.00078125,
|
|
"calibration/coverage@15%": 0.008203125,
|
|
"calibration/coverage@20%": 0.17043328033268101,
|
|
"calibration/coverage@25%": 0.3175276724559687,
|
|
"calibration/coverage@30%": 0.4125,
|
|
"calibration/coverage@5%": 0.00078125,
|
|
"calibration/ece": 0.16002306553664605,
|
|
"calibration/mean_confidence": 0.5115657670381608,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00146484375,
|
|
"completions/max_length": 702.2,
|
|
"completions/max_terminated_length": 702.2,
|
|
"completions/mean_length": 202.5775390625,
|
|
"completions/mean_terminated_length": 202.87572021484374,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 95.2,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0008818231872282922,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0009,
|
|
"num_tokens": 384148966.0,
|
|
"reward": 0.9308542728424072,
|
|
"reward_std": 0.09076899141073227,
|
|
"rewards/accuracy_reward": 0.49150390625,
|
|
"rewards/brier_reward": 0.7683249115943909,
|
|
"rewards/confidence_uniqueness_reward": 0.9473673701286316,
|
|
"rewards/format_reward": 0.9984375,
|
|
"rewards/frontier_coverage_0": 0.14300051778554917,
|
|
"rewards/frontier_coverage_1": 0.14300051778554917,
|
|
"rewards/frontier_coverage_10": 0.14300051778554917,
|
|
"rewards/frontier_coverage_15": 0.14300051778554917,
|
|
"rewards/frontier_coverage_20": 0.14300051778554917,
|
|
"rewards/frontier_coverage_25": 0.14300051778554917,
|
|
"rewards/frontier_coverage_5": 0.14300051778554917,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.110247802734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14572769552469253,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0551239013671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0551239013671875,
|
|
"signal/advantage_abs_mean": 0.06792233437299729,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06792233437299729,
|
|
"signal/advantage_pre_scale_std": 0.11378230005502701,
|
|
"signal/advantage_std": 0.11378230005502701,
|
|
"signal/brier_reward/centered_abs_mean": 0.17012497782707214,
|
|
"signal/brier_reward/group_std_mean": 0.21865971982479096,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017012498155236245,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017012498155236245,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028270235285162926,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03985480517148972,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002827023435384035,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002827023435384035,
|
|
"signal/format_reward/centered_abs_mean": 0.00301513671875,
|
|
"signal/format_reward/group_std_mean": 0.008502526115626097,
|
|
"signal/format_reward/group_zero_std_frac": 0.953125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001507568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001507568359375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21495278179645538,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27791267037391665,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21495278179645538,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27791267037391665,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21495278179645538,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27791267037391665,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21495278179645538,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27791267037391665,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21495278179645538,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27791267037391665,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21495278179645538,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27791267037391665,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21495278179645538,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27791267037391665,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030738247092813253,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33271643651355365,
|
|
"calibration/batch_distribution_entropy": 0.9126303336176506,
|
|
"calibration/buffer_distribution_entropy": 0.9914148505835181,
|
|
"calibration/confidence_entropy": 0.39177509431275864,
|
|
"calibration/coverage@0%": 0.016037841524134212,
|
|
"calibration/coverage@1%": 0.016037841524134212,
|
|
"calibration/coverage@10%": 0.15238588073982048,
|
|
"calibration/coverage@15%": 0.23256510868099695,
|
|
"calibration/coverage@20%": 0.2900405988770754,
|
|
"calibration/coverage@25%": 0.332675402798644,
|
|
"calibration/coverage@30%": 0.38508195917119303,
|
|
"calibration/coverage@5%": 0.07093980230844793,
|
|
"calibration/ece": 0.14636101944079075,
|
|
"calibration/mean_confidence": 0.45591531154083825,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00146484375,
|
|
"completions/max_length": 754.0,
|
|
"completions/max_terminated_length": 754.0,
|
|
"completions/mean_length": 201.73955078125,
|
|
"completions/mean_terminated_length": 202.03497009277345,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 83.4,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0009589268011040986,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 401071291.0,
|
|
"reward": 0.9465407133102417,
|
|
"reward_std": 0.08997839242219925,
|
|
"rewards/accuracy_reward": 0.521875,
|
|
"rewards/brier_reward": 0.7804174304008484,
|
|
"rewards/confidence_uniqueness_reward": 0.9465129852294922,
|
|
"rewards/format_reward": 0.99853515625,
|
|
"rewards/frontier_coverage_0": 0.13628980442881583,
|
|
"rewards/frontier_coverage_1": 0.13628980442881583,
|
|
"rewards/frontier_coverage_10": 0.13628980442881583,
|
|
"rewards/frontier_coverage_15": 0.13628980442881583,
|
|
"rewards/frontier_coverage_20": 0.13628980442881583,
|
|
"rewards/frontier_coverage_25": 0.13628980442881583,
|
|
"rewards/frontier_coverage_5": 0.13628980442881583,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10885009765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15104981660842895,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054425048828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.054425048828125,
|
|
"signal/advantage_abs_mean": 0.06531496718525887,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06531496718525887,
|
|
"signal/advantage_pre_scale_std": 0.11181586831808091,
|
|
"signal/advantage_std": 0.11181586831808091,
|
|
"signal/brier_reward/centered_abs_mean": 0.16072153747081758,
|
|
"signal/brier_reward/group_std_mean": 0.20803788006305696,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016072153858840466,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016072153858840466,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027308131381869317,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0376481682062149,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027308131102472544,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027308131102472544,
|
|
"signal/format_reward/centered_abs_mean": 0.002789306640625,
|
|
"signal/format_reward/group_std_mean": 0.0069411737378686665,
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013946533203125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0013946533203125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2067556768655777,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26987250447273253,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2067556768655777,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26987250447273253,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2067556768655777,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26987250447273253,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2067556768655777,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26987250447273253,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2067556768655777,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26987250447273253,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2067556768655777,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26987250447273253,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2067556768655777,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26987250447273253,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002956606028601527,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002956606028601527,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.42876697440671235,
|
|
"calibration/batch_distribution_entropy": 0.949518661468977,
|
|
"calibration/buffer_distribution_entropy": 0.9917979609259282,
|
|
"calibration/confidence_entropy": 0.42313672076600994,
|
|
"calibration/coverage@0%": 0.005087316176470588,
|
|
"calibration/coverage@1%": 0.005087316176470588,
|
|
"calibration/coverage@10%": 0.005087316176470588,
|
|
"calibration/coverage@15%": 0.019583333333333335,
|
|
"calibration/coverage@20%": 0.023880208333333333,
|
|
"calibration/coverage@25%": 0.048635982184375724,
|
|
"calibration/coverage@30%": 0.22175975306130358,
|
|
"calibration/coverage@5%": 0.005087316176470588,
|
|
"calibration/ece": 0.2017280795755636,
|
|
"calibration/mean_confidence": 0.48690404596470865,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00146484375,
|
|
"completions/max_length": 497.6,
|
|
"completions/max_terminated_length": 497.6,
|
|
"completions/mean_length": 198.98349609375,
|
|
"completions/mean_terminated_length": 199.2773651123047,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 92.2,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.0010078635532408953,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.001,
|
|
"num_tokens": 418145330.0,
|
|
"reward": 0.9326099157333374,
|
|
"reward_std": 0.09904382973909379,
|
|
"rewards/accuracy_reward": 0.49951171875,
|
|
"rewards/brier_reward": 0.7598520636558532,
|
|
"rewards/confidence_uniqueness_reward": 0.9458034992218017,
|
|
"rewards/format_reward": 0.9984375,
|
|
"rewards/frontier_coverage_0": 0.1308102782815695,
|
|
"rewards/frontier_coverage_1": 0.1308102782815695,
|
|
"rewards/frontier_coverage_10": 0.1308102782815695,
|
|
"rewards/frontier_coverage_15": 0.1308102782815695,
|
|
"rewards/frontier_coverage_20": 0.1308102782815695,
|
|
"rewards/frontier_coverage_25": 0.12910652123391628,
|
|
"rewards/frontier_coverage_5": 0.1308102782815695,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.128631591796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.17192812263965607,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0643157958984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0643157958984375,
|
|
"signal/advantage_abs_mean": 0.07444410920143127,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07444410920143127,
|
|
"signal/advantage_pre_scale_std": 0.12136494815349579,
|
|
"signal/advantage_std": 0.12136494815349579,
|
|
"signal/brier_reward/centered_abs_mean": 0.1757282793521881,
|
|
"signal/brier_reward/group_std_mean": 0.22406087815761566,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017572828009724616,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017572828009724616,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027075739949941634,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03780955001711846,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027075740043073894,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027075740043073894,
|
|
"signal/format_reward/centered_abs_mean": 0.0029541015625,
|
|
"signal/format_reward/group_std_mean": 0.007679159566760063,
|
|
"signal/format_reward/group_zero_std_frac": 0.959375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00147705078125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00147705078125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21751558780670166,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.28371057510375974,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21751558780670166,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28371057510375974,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21751558780670166,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28371057510375974,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21751558780670166,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28371057510375974,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21751558780670166,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28371057510375974,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21508013904094697,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2806576728820801,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003075646050274372,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003075646050274372,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21751558780670166,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28371057510375974,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031104729045182467,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31364459658875965,
|
|
"calibration/batch_distribution_entropy": 0.9483515066753176,
|
|
"calibration/buffer_distribution_entropy": 0.9914676447275372,
|
|
"calibration/confidence_entropy": 0.4294093524780046,
|
|
"calibration/coverage@0%": 0.004694406925343812,
|
|
"calibration/coverage@1%": 0.004694406925343812,
|
|
"calibration/coverage@10%": 0.03828815692534381,
|
|
"calibration/coverage@15%": 0.0996162819253438,
|
|
"calibration/coverage@20%": 0.18711628192534382,
|
|
"calibration/coverage@25%": 0.3008249938605108,
|
|
"calibration/coverage@30%": 0.40244044695481335,
|
|
"calibration/coverage@5%": 0.004694406925343812,
|
|
"calibration/ece": 0.130821016745361,
|
|
"calibration/mean_confidence": 0.5221690310011222,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00126953125,
|
|
"completions/max_length": 717.4,
|
|
"completions/max_terminated_length": 717.4,
|
|
"completions/mean_length": 199.20458984375,
|
|
"completions/mean_terminated_length": 199.45704650878906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 88.4,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0010729380883276463,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0005,
|
|
"num_tokens": 435066369.0,
|
|
"reward": 0.9395390391349793,
|
|
"reward_std": 0.09434663653373718,
|
|
"rewards/accuracy_reward": 0.51064453125,
|
|
"rewards/brier_reward": 0.7705135226249695,
|
|
"rewards/confidence_uniqueness_reward": 0.9472472310066223,
|
|
"rewards/format_reward": 0.9986328125,
|
|
"rewards/frontier_coverage_0": 0.13238348066806793,
|
|
"rewards/frontier_coverage_1": 0.13238348066806793,
|
|
"rewards/frontier_coverage_10": 0.13238348066806793,
|
|
"rewards/frontier_coverage_15": 0.13238348066806793,
|
|
"rewards/frontier_coverage_20": 0.13238348066806793,
|
|
"rewards/frontier_coverage_25": 0.12348232418298721,
|
|
"rewards/frontier_coverage_5": 0.13238348066806793,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.124066162109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.15881072282791137,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0620330810546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0620330810546875,
|
|
"signal/advantage_abs_mean": 0.0726077377796173,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0726077377796173,
|
|
"signal/advantage_pre_scale_std": 0.11807905584573745,
|
|
"signal/advantage_std": 0.11807905584573745,
|
|
"signal/brier_reward/centered_abs_mean": 0.17476985454559327,
|
|
"signal/brier_reward/group_std_mean": 0.22144999206066132,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01747698597609997,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01747698597609997,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026467961445450782,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03703402951359749,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002646796219050884,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002646796219050884,
|
|
"signal/format_reward/centered_abs_mean": 0.00264892578125,
|
|
"signal/format_reward/group_std_mean": 0.007733980286866426,
|
|
"signal/format_reward/group_zero_std_frac": 0.95625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22557145953178406,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.287382698059082,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22557145953178406,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.287382698059082,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22557145953178406,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.287382698059082,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22557145953178406,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.287382698059082,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22557145953178406,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.287382698059082,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2027723640203476,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25904818475246427,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002899644710123539,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002899644710123539,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22557145953178406,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.287382698059082,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032256717327982186,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27631575926441354,
|
|
"calibration/batch_distribution_entropy": 0.9411343032566624,
|
|
"calibration/buffer_distribution_entropy": 0.9906036163305982,
|
|
"calibration/confidence_entropy": 0.4103170827497781,
|
|
"calibration/coverage@0%": 0.00703125,
|
|
"calibration/coverage@1%": 0.00703125,
|
|
"calibration/coverage@10%": 0.107421875,
|
|
"calibration/coverage@15%": 0.148828125,
|
|
"calibration/coverage@20%": 0.3464935661764706,
|
|
"calibration/coverage@25%": 0.42737745098039215,
|
|
"calibration/coverage@30%": 0.5640977328431372,
|
|
"calibration/coverage@5%": 0.03203125,
|
|
"calibration/ece": 0.12333647071900052,
|
|
"calibration/mean_confidence": 0.5359197657645645,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 630.6,
|
|
"completions/max_terminated_length": 630.6,
|
|
"completions/mean_length": 193.8431640625,
|
|
"completions/mean_terminated_length": 193.9956481933594,
|
|
"completions/min_length": 18.0,
|
|
"completions/min_terminated_length": 89.2,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0013667960884049535,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 452065659.0,
|
|
"reward": 0.9584846138954163,
|
|
"reward_std": 0.08736461549997329,
|
|
"rewards/accuracy_reward": 0.54599609375,
|
|
"rewards/brier_reward": 0.7880683302879333,
|
|
"rewards/confidence_uniqueness_reward": 0.9461393594741822,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_coverage_0": 0.127962838858366,
|
|
"rewards/frontier_coverage_1": 0.127962838858366,
|
|
"rewards/frontier_coverage_10": 0.127962838858366,
|
|
"rewards/frontier_coverage_15": 0.127962838858366,
|
|
"rewards/frontier_coverage_20": 0.127962838858366,
|
|
"rewards/frontier_coverage_25": 0.10671568959951401,
|
|
"rewards/frontier_coverage_5": 0.127962838858366,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.118438720703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.15208634585142136,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0592193603515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0592193603515625,
|
|
"signal/advantage_abs_mean": 0.06664500907063484,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06664500907063484,
|
|
"signal/advantage_pre_scale_std": 0.11240910291671753,
|
|
"signal/advantage_std": 0.11240910291671753,
|
|
"signal/brier_reward/centered_abs_mean": 0.1604565739631653,
|
|
"signal/brier_reward/group_std_mean": 0.20421489775180818,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01604565791785717,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01604565791785717,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027118064090609552,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.037455400079488756,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027118063997477295,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027118063997477295,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.0049718443769961596,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21060441732406615,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.269395238161087,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21060441732406615,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.269395238161087,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21060441732406615,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.269395238161087,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21060441732406615,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.269395238161087,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21060441732406615,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.269395238161087,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16856006383895875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21680730879306792,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002410408854484558,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002410408854484558,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21060441732406615,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.269395238161087,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030116431415081026,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28610566413293276,
|
|
"calibration/batch_distribution_entropy": 0.941972818254718,
|
|
"calibration/buffer_distribution_entropy": 0.9895541282721396,
|
|
"calibration/confidence_entropy": 0.4368501676028683,
|
|
"calibration/coverage@0%": 0.013307240704500978,
|
|
"calibration/coverage@1%": 0.013307240704500978,
|
|
"calibration/coverage@10%": 0.07397260273972603,
|
|
"calibration/coverage@15%": 0.17565970523483365,
|
|
"calibration/coverage@20%": 0.24409781678082193,
|
|
"calibration/coverage@25%": 0.35203797700587086,
|
|
"calibration/coverage@30%": 0.501732968444227,
|
|
"calibration/coverage@5%": 0.05636007827788649,
|
|
"calibration/ece": 0.148974853332463,
|
|
"calibration/mean_confidence": 0.5850199136094146,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 609.4,
|
|
"completions/max_terminated_length": 609.4,
|
|
"completions/mean_length": 197.78876953125,
|
|
"completions/mean_terminated_length": 197.92320861816407,
|
|
"completions/min_length": 18.0,
|
|
"completions/min_terminated_length": 89.6,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0012304031988605857,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 469043816.0,
|
|
"reward": 0.9455674409866333,
|
|
"reward_std": 0.08576287478208541,
|
|
"rewards/accuracy_reward": 0.51591796875,
|
|
"rewards/brier_reward": 0.7885148882865906,
|
|
"rewards/confidence_uniqueness_reward": 0.9510594606399536,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_coverage_0": 0.145331272482872,
|
|
"rewards/frontier_coverage_1": 0.145331272482872,
|
|
"rewards/frontier_coverage_10": 0.145331272482872,
|
|
"rewards/frontier_coverage_15": 0.145331272482872,
|
|
"rewards/frontier_coverage_20": 0.1450465127825737,
|
|
"rewards/frontier_coverage_25": 0.11706055402755737,
|
|
"rewards/frontier_coverage_5": 0.145331272482872,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.109393310546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1406890869140625,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0546966552734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0546966552734375,
|
|
"signal/advantage_abs_mean": 0.06536918431520462,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06536918431520462,
|
|
"signal/advantage_pre_scale_std": 0.11072720885276795,
|
|
"signal/advantage_std": 0.11072720885276795,
|
|
"signal/brier_reward/centered_abs_mean": 0.15777938365936278,
|
|
"signal/brier_reward/group_std_mean": 0.20213670134544373,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01577793899923563,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01577793899923563,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0231874518096447,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0322634294629097,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023187451995909216,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023187451995909216,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_std_mean": 0.005524271493777632,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2044217973947525,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2626798987388611,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2044217973947525,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2626798987388611,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2044217973947525,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2626798987388611,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2044217973947525,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2626798987388611,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2033557653427124,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2613858848810196,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029079874977469443,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029079874977469443,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15121191143989562,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1960006058216095,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021623303182423115,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021623303182423115,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2044217973947525,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2626798987388611,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029232318513095377,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.41597604928666837,
|
|
"calibration/batch_distribution_entropy": 0.9587108118403496,
|
|
"calibration/buffer_distribution_entropy": 0.9883232106890395,
|
|
"calibration/confidence_entropy": 0.4468210218563723,
|
|
"calibration/coverage@0%": 0.005078125,
|
|
"calibration/coverage@1%": 0.005078125,
|
|
"calibration/coverage@10%": 0.005078125,
|
|
"calibration/coverage@15%": 0.00546875,
|
|
"calibration/coverage@20%": 0.059765625,
|
|
"calibration/coverage@25%": 0.09765625,
|
|
"calibration/coverage@30%": 0.26015625,
|
|
"calibration/coverage@5%": 0.005078125,
|
|
"calibration/ece": 0.15567187863866366,
|
|
"calibration/mean_confidence": 0.5199681562651445,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 530.8,
|
|
"completions/max_terminated_length": 530.8,
|
|
"completions/mean_length": 198.5326171875,
|
|
"completions/mean_terminated_length": 198.66876831054688,
|
|
"completions/min_length": 35.6,
|
|
"completions/min_terminated_length": 88.8,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0010460478952154517,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 486247606.0,
|
|
"reward": 0.9150034546852112,
|
|
"reward_std": 0.08174609690904618,
|
|
"rewards/accuracy_reward": 0.4603515625,
|
|
"rewards/brier_reward": 0.7551176190376282,
|
|
"rewards/confidence_uniqueness_reward": 0.949243712425232,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_coverage_0": 0.15224866196513176,
|
|
"rewards/frontier_coverage_1": 0.15224866196513176,
|
|
"rewards/frontier_coverage_10": 0.15224866196513176,
|
|
"rewards/frontier_coverage_15": 0.15224866196513176,
|
|
"rewards/frontier_coverage_20": 0.15224866196513176,
|
|
"rewards/frontier_coverage_25": 0.12363973185420037,
|
|
"rewards/frontier_coverage_5": 0.15224866196513176,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0911376953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12455651462078095,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04556884765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04556884765625,
|
|
"signal/advantage_abs_mean": 0.06125259175896645,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06125259175896645,
|
|
"signal/advantage_pre_scale_std": 0.10643679648637772,
|
|
"signal/advantage_std": 0.10643679648637772,
|
|
"signal/brier_reward/centered_abs_mean": 0.16171522736549376,
|
|
"signal/brier_reward/group_std_mean": 0.20533936619758605,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016171522811055182,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016171522811055182,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02406933158636093,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03289683237671852,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024069331819191577,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024069331819191577,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.004971844470128417,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1879375845193863,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2440842032432556,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1879375845193863,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2440842032432556,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1879375845193863,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2440842032432556,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1879375845193863,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2440842032432556,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1879375845193863,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2440842032432556,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14551771879196168,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.19012450873851777,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020809032954275607,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020809032954275607,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1879375845193863,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2440842032432556,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002687507402151823,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002687507402151823,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2966352076482409,
|
|
"calibration/batch_distribution_entropy": 0.9417444696033105,
|
|
"calibration/buffer_distribution_entropy": 0.9854769117447294,
|
|
"calibration/confidence_entropy": 0.4245164194763583,
|
|
"calibration/coverage@0%": 0.0019561827299412913,
|
|
"calibration/coverage@1%": 0.0019561827299412913,
|
|
"calibration/coverage@10%": 0.04687805772994129,
|
|
"calibration/coverage@15%": 0.06523743272994129,
|
|
"calibration/coverage@20%": 0.26094208659491197,
|
|
"calibration/coverage@25%": 0.44967205846379643,
|
|
"calibration/coverage@30%": 0.5446397994129158,
|
|
"calibration/coverage@5%": 0.0019561827299412913,
|
|
"calibration/ece": 0.14131419788242233,
|
|
"calibration/mean_confidence": 0.5137820425893411,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00107421875,
|
|
"completions/max_length": 758.8,
|
|
"completions/max_terminated_length": 758.8,
|
|
"completions/mean_length": 195.35341796875,
|
|
"completions/mean_terminated_length": 195.5635498046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 89.8,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.0009436359978280962,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0008,
|
|
"num_tokens": 503296057.0,
|
|
"reward": 0.942992627620697,
|
|
"reward_std": 0.08964750766754151,
|
|
"rewards/accuracy_reward": 0.5171875,
|
|
"rewards/brier_reward": 0.771629810333252,
|
|
"rewards/confidence_uniqueness_reward": 0.9447030186653137,
|
|
"rewards/format_reward": 0.99892578125,
|
|
"rewards/frontier_coverage_0": 0.13579091578722,
|
|
"rewards/frontier_coverage_1": 0.13579091578722,
|
|
"rewards/frontier_coverage_10": 0.13579091578722,
|
|
"rewards/frontier_coverage_15": 0.13579091578722,
|
|
"rewards/frontier_coverage_20": 0.13579091578722,
|
|
"rewards/frontier_coverage_25": 0.1155124381184578,
|
|
"rewards/frontier_coverage_5": 0.13579091578722,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1217041015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.16184872686862944,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06085205078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06085205078125,
|
|
"signal/advantage_abs_mean": 0.06639757454395294,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06639757454395294,
|
|
"signal/advantage_pre_scale_std": 0.11358063519001008,
|
|
"signal/advantage_std": 0.11358063519001008,
|
|
"signal/brier_reward/centered_abs_mean": 0.15432437360286713,
|
|
"signal/brier_reward/group_std_mean": 0.20063064694404603,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01543243769556284,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01543243769556284,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026983362436294556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03629298433661461,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026983361691236495,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026983361691236495,
|
|
"signal/format_reward/centered_abs_mean": 0.002069091796875,
|
|
"signal/format_reward/group_std_mean": 0.005740390252321958,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20254981517791748,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2641178369522095,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20254981517791748,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2641178369522095,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20254981517791748,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2641178369522095,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20254981517791748,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2641178369522095,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20254981517791748,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2641178369522095,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15858907699584962,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20831416547298431,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022678238339722155,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022678238339722155,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20254981517791748,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2641178369522095,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028964622411876916,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.4831069061903782,
|
|
"eval_calibration/batch_distribution_entropy": 0.8847234666383597,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9830679215380574,
|
|
"eval_calibration/confidence_entropy": 0.4128065002360819,
|
|
"eval_calibration/coverage@0%": 0.078125,
|
|
"eval_calibration/coverage@1%": 0.078125,
|
|
"eval_calibration/coverage@10%": 0.078125,
|
|
"eval_calibration/coverage@15%": 0.0859375,
|
|
"eval_calibration/coverage@20%": 0.125,
|
|
"eval_calibration/coverage@25%": 0.2109375,
|
|
"eval_calibration/coverage@30%": 0.2265625,
|
|
"eval_calibration/coverage@5%": 0.078125,
|
|
"eval_calibration/ece": 0.2611121955578006,
|
|
"eval_calibration/mean_confidence": 0.47455383892968606,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 403.0,
|
|
"eval_completions/max_terminated_length": 403.0,
|
|
"eval_completions/mean_length": 195.1864891052246,
|
|
"eval_completions/mean_terminated_length": 195.1864891052246,
|
|
"eval_completions/min_length": 101.0,
|
|
"eval_completions/min_terminated_length": 101.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 503296057.0,
|
|
"eval_reward": 0.8924962729215622,
|
|
"eval_reward_std": 0.2204424850642681,
|
|
"eval_rewards/accuracy_reward": 0.408203125,
|
|
"eval_rewards/brier_reward": 0.7837338745594025,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8876953125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_coverage_0": 0.21979742124676704,
|
|
"eval_rewards/frontier_coverage_1": 0.21979742124676704,
|
|
"eval_rewards/frontier_coverage_10": 0.21979742124676704,
|
|
"eval_rewards/frontier_coverage_15": 0.21979742124676704,
|
|
"eval_rewards/frontier_coverage_20": 0.21979742124676704,
|
|
"eval_rewards/frontier_coverage_25": 0.1673554927110672,
|
|
"eval_rewards/frontier_coverage_5": 0.21979742124676704,
|
|
"eval_runtime": 21.217,
|
|
"eval_samples_per_second": 23.566,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4635009765625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48869405686855316,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23175048828125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23175048828125,
|
|
"eval_signal/advantage_abs_mean": 0.20180771127343178,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20180771127343178,
|
|
"eval_signal/advantage_pre_scale_std": 0.21814486756920815,
|
|
"eval_signal/advantage_std": 0.21814486756920815,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22955559566617012,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2795948311686516,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022955560591071844,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022955560591071844,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.054779052734375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0656774491071701,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054779056226834655,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054779056226834655,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3919490575790405,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4804074615240097,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3919490575790405,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4804074615240097,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3919490575790405,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4804074615240097,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3919490575790405,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4804074615240097,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3919490575790405,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4804074615240097,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.30262789130210876,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3734619617462158,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004327578702941537,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004327578702941537,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3919490575790405,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4804074615240097,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005604871432296932,
|
|
"eval_steps_per_second": 0.189,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3799576377700869,
|
|
"calibration/batch_distribution_entropy": 0.942893601281817,
|
|
"calibration/buffer_distribution_entropy": 0.9810508104668042,
|
|
"calibration/confidence_entropy": 0.416781882516017,
|
|
"calibration/coverage@0%": 0.005860139432485323,
|
|
"calibration/coverage@1%": 0.005860139432485323,
|
|
"calibration/coverage@10%": 0.11211013943248531,
|
|
"calibration/coverage@15%": 0.1527810053816047,
|
|
"calibration/coverage@20%": 0.18170101516634052,
|
|
"calibration/coverage@25%": 0.21533528008806263,
|
|
"calibration/coverage@30%": 0.289664180312536,
|
|
"calibration/coverage@5%": 0.04961013943248532,
|
|
"calibration/ece": 0.1558927200859373,
|
|
"calibration/mean_confidence": 0.5103141788755559,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 553.0,
|
|
"completions/max_terminated_length": 553.0,
|
|
"completions/mean_length": 194.48505859375,
|
|
"completions/mean_terminated_length": 194.6558349609375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 90.2,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0008825138211250305,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 520595424.0,
|
|
"reward": 0.9555532336235046,
|
|
"reward_std": 0.08352845162153244,
|
|
"rewards/accuracy_reward": 0.54580078125,
|
|
"rewards/brier_reward": 0.7727875947952271,
|
|
"rewards/confidence_uniqueness_reward": 0.9481261730194092,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_coverage_0": 0.11409009248018265,
|
|
"rewards/frontier_coverage_1": 0.11409009248018265,
|
|
"rewards/frontier_coverage_10": 0.11409009248018265,
|
|
"rewards/frontier_coverage_15": 0.11409009248018265,
|
|
"rewards/frontier_coverage_20": 0.11409009248018265,
|
|
"rewards/frontier_coverage_25": 0.08816799521446228,
|
|
"rewards/frontier_coverage_5": 0.11409009248018265,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.103399658203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1389443188905716,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516998291015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0516998291015625,
|
|
"signal/advantage_abs_mean": 0.06242571994662285,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06242571994662285,
|
|
"signal/advantage_pre_scale_std": 0.10876623839139939,
|
|
"signal/advantage_std": 0.10876623839139939,
|
|
"signal/brier_reward/centered_abs_mean": 0.1525803655385971,
|
|
"signal/brier_reward/group_std_mean": 0.19565546214580537,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01525803655385971,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01525803655385971,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024064848199486732,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0327242337167263,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024064849596470593,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024064849596470593,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_std_mean": 0.005524271540343762,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18754335045814513,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24416738152503967,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18754335045814513,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24416738152503967,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18754335045814513,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24416738152503967,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18754335045814513,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24416738152503967,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18754335045814513,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24416738152503967,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14260709285736084,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18698225021362305,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002039281511679292,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002039281511679292,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18754335045814513,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24416738152503967,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002681869873777032,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002681869873777032,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3433580891081784,
|
|
"calibration/batch_distribution_entropy": 0.9489904033296426,
|
|
"calibration/buffer_distribution_entropy": 0.9766839241875452,
|
|
"calibration/confidence_entropy": 0.43796031112504796,
|
|
"calibration/coverage@0%": 0.01333103404032846,
|
|
"calibration/coverage@1%": 0.01333103404032846,
|
|
"calibration/coverage@10%": 0.173933056099152,
|
|
"calibration/coverage@15%": 0.2659734972756226,
|
|
"calibration/coverage@20%": 0.34586997513046314,
|
|
"calibration/coverage@25%": 0.39675685171328806,
|
|
"calibration/coverage@30%": 0.4453351961503779,
|
|
"calibration/coverage@5%": 0.09614353404032847,
|
|
"calibration/ece": 0.16492687563132064,
|
|
"calibration/mean_confidence": 0.512453271287414,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009765625,
|
|
"completions/max_length": 571.0,
|
|
"completions/max_terminated_length": 571.0,
|
|
"completions/mean_length": 186.35107421875,
|
|
"completions/mean_terminated_length": 186.5321838378906,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 92.6,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.0010090820724144578,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 537649323.0,
|
|
"reward": 0.9542897462844848,
|
|
"reward_std": 0.0837359830737114,
|
|
"rewards/accuracy_reward": 0.5373046875,
|
|
"rewards/brier_reward": 0.7862473368644715,
|
|
"rewards/confidence_uniqueness_reward": 0.9487914681434632,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_coverage_0": 0.1308758407831192,
|
|
"rewards/frontier_coverage_1": 0.1308758407831192,
|
|
"rewards/frontier_coverage_10": 0.1308758407831192,
|
|
"rewards/frontier_coverage_15": 0.1308758407831192,
|
|
"rewards/frontier_coverage_20": 0.1308758407831192,
|
|
"rewards/frontier_coverage_25": 0.09738899916410446,
|
|
"rewards/frontier_coverage_5": 0.1308758407831192,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10369873046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.138237564265728,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051849365234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051849365234375,
|
|
"signal/advantage_abs_mean": 0.06327899843454361,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06327899843454361,
|
|
"signal/advantage_pre_scale_std": 0.11133654713630677,
|
|
"signal/advantage_std": 0.11133654713630677,
|
|
"signal/brier_reward/centered_abs_mean": 0.1488574415445328,
|
|
"signal/brier_reward/group_std_mean": 0.1922387957572937,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014885743707418441,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014885743707418441,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023807717114686967,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03172732964158058,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023807717021554708,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023807717021554708,
|
|
"signal/format_reward/centered_abs_mean": 0.00186767578125,
|
|
"signal/format_reward/group_std_mean": 0.00485165468417108,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000933837890625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1794394850730896,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23912697434425353,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1794394850730896,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23912697434425353,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1794394850730896,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23912697434425353,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1794394850730896,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23912697434425353,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1794394850730896,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23912697434425353,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12620090395212175,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16993048191070556,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018046729266643525,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018046729266643525,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1794394850730896,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23912697434425353,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002565984660759568,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002565984660759568,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20960424511882794,
|
|
"calibration/batch_distribution_entropy": 0.937826372264357,
|
|
"calibration/buffer_distribution_entropy": 0.9732044406089282,
|
|
"calibration/confidence_entropy": 0.4191032450695021,
|
|
"calibration/coverage@0%": 0.017591884784735813,
|
|
"calibration/coverage@1%": 0.017591884784735813,
|
|
"calibration/coverage@10%": 0.2767230308219178,
|
|
"calibration/coverage@15%": 0.45409506482387474,
|
|
"calibration/coverage@20%": 0.5728779354207436,
|
|
"calibration/coverage@25%": 0.6584622676125245,
|
|
"calibration/coverage@30%": 0.7624204990215264,
|
|
"calibration/coverage@5%": 0.0547142551369863,
|
|
"calibration/ece": 0.11670693359510136,
|
|
"calibration/mean_confidence": 0.5251318956402845,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 606.6,
|
|
"completions/max_terminated_length": 606.6,
|
|
"completions/mean_length": 180.396875,
|
|
"completions/mean_terminated_length": 180.43230285644532,
|
|
"completions/min_length": 54.2,
|
|
"completions/min_terminated_length": 89.0,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0013867069501429796,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 554526123.0,
|
|
"reward": 0.9590974926948548,
|
|
"reward_std": 0.07940471768379212,
|
|
"rewards/accuracy_reward": 0.5408203125,
|
|
"rewards/brier_reward": 0.799199378490448,
|
|
"rewards/confidence_uniqueness_reward": 0.9451055526733398,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.14895989149808883,
|
|
"rewards/frontier_coverage_1": 0.14895989149808883,
|
|
"rewards/frontier_coverage_10": 0.14895989149808883,
|
|
"rewards/frontier_coverage_15": 0.14895989149808883,
|
|
"rewards/frontier_coverage_20": 0.14895989149808883,
|
|
"rewards/frontier_coverage_25": 0.11005051881074905,
|
|
"rewards/frontier_coverage_5": 0.14895989149808883,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1129638671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14522374868392945,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05648193359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05648193359375,
|
|
"signal/advantage_abs_mean": 0.06193904280662536,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06193904280662536,
|
|
"signal/advantage_pre_scale_std": 0.1081055223941803,
|
|
"signal/advantage_std": 0.1081055223941803,
|
|
"signal/brier_reward/centered_abs_mean": 0.1428891509771347,
|
|
"signal/brier_reward/group_std_mean": 0.180334734916687,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014288916438817977,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014288916438817977,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026091757416725158,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03331942185759544,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002609175816178322,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002609175816178322,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18938855528831483,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2450707495212555,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18938855528831483,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2450707495212555,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18938855528831483,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2450707495212555,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18938855528831483,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2450707495212555,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18938855528831483,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2450707495212555,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1290591835975647,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16820741891860963,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018455463228747248,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018455463228747248,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18938855528831483,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2450707495212555,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00270825638435781,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00270825638435781,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22657361490672714,
|
|
"calibration/batch_distribution_entropy": 0.9040110531831956,
|
|
"calibration/buffer_distribution_entropy": 0.9696198518608401,
|
|
"calibration/confidence_entropy": 0.4071839362196341,
|
|
"calibration/coverage@0%": 0.028537793542074363,
|
|
"calibration/coverage@1%": 0.028537793542074363,
|
|
"calibration/coverage@10%": 0.1836938906555773,
|
|
"calibration/coverage@15%": 0.30716579011741685,
|
|
"calibration/coverage@20%": 0.5076190985812132,
|
|
"calibration/coverage@25%": 0.6631237769080235,
|
|
"calibration/coverage@30%": 0.7479207436399217,
|
|
"calibration/coverage@5%": 0.06372462084148728,
|
|
"calibration/ece": 0.09039096955456043,
|
|
"calibration/mean_confidence": 0.5459185071689842,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 513.6,
|
|
"completions/max_terminated_length": 513.6,
|
|
"completions/mean_length": 174.13134765625,
|
|
"completions/mean_terminated_length": 174.1650604248047,
|
|
"completions/min_length": 64.6,
|
|
"completions/min_terminated_length": 82.2,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.0009238318889401853,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 571472812.0,
|
|
"reward": 0.9676682591438294,
|
|
"reward_std": 0.08454482406377792,
|
|
"rewards/accuracy_reward": 0.56689453125,
|
|
"rewards/brier_reward": 0.7879523038864136,
|
|
"rewards/confidence_uniqueness_reward": 0.9500796794891357,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_coverage_0": 0.10987547188997268,
|
|
"rewards/frontier_coverage_1": 0.10987547188997268,
|
|
"rewards/frontier_coverage_10": 0.10987547188997268,
|
|
"rewards/frontier_coverage_15": 0.10987547188997268,
|
|
"rewards/frontier_coverage_20": 0.10958926826715469,
|
|
"rewards/frontier_coverage_25": 0.07979481071233749,
|
|
"rewards/frontier_coverage_5": 0.10987547188997268,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.115875244140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.157163542509079,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0579376220703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0579376220703125,
|
|
"signal/advantage_abs_mean": 0.06283592209219932,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06283592209219932,
|
|
"signal/advantage_pre_scale_std": 0.11006280481815338,
|
|
"signal/advantage_std": 0.11006280481815338,
|
|
"signal/brier_reward/centered_abs_mean": 0.1496346950531006,
|
|
"signal/brier_reward/group_std_mean": 0.19032892882823943,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014963469840586186,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014963469840586186,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023456166312098504,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03012530505657196,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002345616649836302,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002345616649836302,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18942977488040924,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2492722123861313,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18942977488040924,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2492722123861313,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18942977488040924,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2492722123861313,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18942977488040924,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2492722123861313,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18900564014911653,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24872445166110993,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027027806732803582,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027027806732803582,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1239845871925354,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1644377052783966,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017729795770719647,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017729795770719647,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18942977488040924,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2492722123861313,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027088457718491554,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2587221330992257,
|
|
"calibration/batch_distribution_entropy": 0.9351214394358193,
|
|
"calibration/buffer_distribution_entropy": 0.9666515299239423,
|
|
"calibration/confidence_entropy": 0.43241105274145475,
|
|
"calibration/coverage@0%": 0.065234375,
|
|
"calibration/coverage@1%": 0.10078125,
|
|
"calibration/coverage@10%": 0.231640625,
|
|
"calibration/coverage@15%": 0.298828125,
|
|
"calibration/coverage@20%": 0.360546875,
|
|
"calibration/coverage@25%": 0.492578125,
|
|
"calibration/coverage@30%": 0.56875,
|
|
"calibration/coverage@5%": 0.19453125,
|
|
"calibration/ece": 0.12503712805676448,
|
|
"calibration/mean_confidence": 0.5194717080207317,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 501.6,
|
|
"completions/max_terminated_length": 501.6,
|
|
"completions/mean_length": 171.4548828125,
|
|
"completions/mean_terminated_length": 171.48790588378907,
|
|
"completions/min_length": 52.0,
|
|
"completions/min_terminated_length": 86.4,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0008401142549701035,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 588049918.0,
|
|
"reward": 0.9541451930999756,
|
|
"reward_std": 0.07544240057468414,
|
|
"rewards/accuracy_reward": 0.5296875,
|
|
"rewards/brier_reward": 0.8010993957519531,
|
|
"rewards/confidence_uniqueness_reward": 0.9509715795516968,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.14851057529449463,
|
|
"rewards/frontier_coverage_1": 0.14851057529449463,
|
|
"rewards/frontier_coverage_10": 0.14851057529449463,
|
|
"rewards/frontier_coverage_15": 0.14851057529449463,
|
|
"rewards/frontier_coverage_20": 0.14732412695884706,
|
|
"rewards/frontier_coverage_25": 0.10257082134485244,
|
|
"rewards/frontier_coverage_5": 0.14851057529449463,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0928955078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12647614181041716,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04644775390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04644775390625,
|
|
"signal/advantage_abs_mean": 0.05650952383875847,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05650952383875847,
|
|
"signal/advantage_pre_scale_std": 0.10081221014261246,
|
|
"signal/advantage_std": 0.10081221014261246,
|
|
"signal/brier_reward/centered_abs_mean": 0.14023579359054567,
|
|
"signal/brier_reward/group_std_mean": 0.1808777332305908,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014023579470813275,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014023579470813275,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022420838847756384,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028805967792868613,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022420838475227358,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022420838475227358,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1793442577123642,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23208971619606017,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1793442577123642,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23208971619606017,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1793442577123642,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23208971619606017,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1793442577123642,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23208971619606017,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17742233574390412,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22966001629829408,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025371393654495477,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025371393654495477,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11494539082050323,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14966228008270263,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016437190817669034,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016437190817669034,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1793442577123642,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23208971619606017,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025646228808909655,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32387850371371785,
|
|
"calibration/batch_distribution_entropy": 0.930447230073459,
|
|
"calibration/buffer_distribution_entropy": 0.9651309621388965,
|
|
"calibration/confidence_entropy": 0.4303574408490675,
|
|
"calibration/coverage@0%": 0.02265625,
|
|
"calibration/coverage@1%": 0.02265625,
|
|
"calibration/coverage@10%": 0.08438264432485323,
|
|
"calibration/coverage@15%": 0.2152420193248532,
|
|
"calibration/coverage@20%": 0.2941482693248532,
|
|
"calibration/coverage@25%": 0.3402420193248532,
|
|
"calibration/coverage@30%": 0.40821459148727984,
|
|
"calibration/coverage@5%": 0.040234375,
|
|
"calibration/ece": 0.10406415746330164,
|
|
"calibration/mean_confidence": 0.5168637963576618,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 452.2,
|
|
"completions/max_terminated_length": 452.2,
|
|
"completions/mean_length": 171.79189453125,
|
|
"completions/mean_terminated_length": 171.90999450683594,
|
|
"completions/min_length": 18.2,
|
|
"completions/min_terminated_length": 88.2,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0009653819724917412,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 604995691.0,
|
|
"reward": 0.943269693851471,
|
|
"reward_std": 0.06973677352070809,
|
|
"rewards/accuracy_reward": 0.515234375,
|
|
"rewards/brier_reward": 0.7794225335121154,
|
|
"rewards/confidence_uniqueness_reward": 0.9458268165588379,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_coverage_0": 0.14076022952795028,
|
|
"rewards/frontier_coverage_1": 0.14076022952795028,
|
|
"rewards/frontier_coverage_10": 0.14076022952795028,
|
|
"rewards/frontier_coverage_15": 0.14076022952795028,
|
|
"rewards/frontier_coverage_20": 0.14067162126302718,
|
|
"rewards/frontier_coverage_25": 0.09744109660387039,
|
|
"rewards/frontier_coverage_5": 0.14076022952795028,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080224609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11059063673019409,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0401123046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0401123046875,
|
|
"signal/advantage_abs_mean": 0.0513992503285408,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0513992503285408,
|
|
"signal/advantage_pre_scale_std": 0.09461777806282043,
|
|
"signal/advantage_std": 0.09461777806282043,
|
|
"signal/brier_reward/centered_abs_mean": 0.13932594060897827,
|
|
"signal/brier_reward/group_std_mean": 0.17819225192070007,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0139325937256217,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0139325937256217,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026319159567356108,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.034161582589149475,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002631915872916579,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002631915872916579,
|
|
"signal/format_reward/centered_abs_mean": 0.001300048828125,
|
|
"signal/format_reward/group_std_mean": 0.0031943732406944036,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1736948162317276,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22478666603565217,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1736948162317276,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22478666603565217,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1736948162317276,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22478666603565217,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1736948162317276,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22478666603565217,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17225461602210998,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22295468747615815,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002463240968063474,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002463240968063474,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11142444163560868,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14512277245521546,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001593369501642883,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001593369501642883,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1736948162317276,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22478666603565217,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002483835769817233,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002483835769817233,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2917369911897618,
|
|
"calibration/batch_distribution_entropy": 0.9291020216354537,
|
|
"calibration/buffer_distribution_entropy": 0.9625577310801576,
|
|
"calibration/confidence_entropy": 0.4217634313370632,
|
|
"calibration/coverage@0%": 0.02306827910958904,
|
|
"calibration/coverage@1%": 0.02306827910958904,
|
|
"calibration/coverage@10%": 0.21228825220156552,
|
|
"calibration/coverage@15%": 0.31037105552837574,
|
|
"calibration/coverage@20%": 0.4471219116927593,
|
|
"calibration/coverage@25%": 0.5424604023972603,
|
|
"calibration/coverage@30%": 0.6170950036692759,
|
|
"calibration/coverage@5%": 0.06802608243639921,
|
|
"calibration/ece": 0.13262487285036945,
|
|
"calibration/mean_confidence": 0.49193563628804593,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 441.0,
|
|
"completions/max_terminated_length": 441.0,
|
|
"completions/mean_length": 171.99794921875,
|
|
"completions/mean_terminated_length": 172.03227844238282,
|
|
"completions/min_length": 69.0,
|
|
"completions/min_terminated_length": 83.6,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0010885463561862707,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 621924662.0,
|
|
"reward": 0.9475321412086487,
|
|
"reward_std": 0.07133645117282868,
|
|
"rewards/accuracy_reward": 0.52119140625,
|
|
"rewards/brier_reward": 0.7884288311004639,
|
|
"rewards/confidence_uniqueness_reward": 0.9417649745941162,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.14728878438472748,
|
|
"rewards/frontier_coverage_1": 0.14728878438472748,
|
|
"rewards/frontier_coverage_10": 0.14728878438472748,
|
|
"rewards/frontier_coverage_15": 0.14728878438472748,
|
|
"rewards/frontier_coverage_20": 0.14530769288539885,
|
|
"rewards/frontier_coverage_25": 0.09829618036746979,
|
|
"rewards/frontier_coverage_5": 0.14728878438472748,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.097454833984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12659366130828859,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487274169921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0487274169921875,
|
|
"signal/advantage_abs_mean": 0.0545043371617794,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0545043371617794,
|
|
"signal/advantage_pre_scale_std": 0.09718780517578125,
|
|
"signal/advantage_std": 0.09718780517578125,
|
|
"signal/brier_reward/centered_abs_mean": 0.13488493859767914,
|
|
"signal/brier_reward/group_std_mean": 0.17273030877113343,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013488493859767914,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013488493859767914,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029794788360595702,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03787661641836167,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029794787988066673,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029794787988066673,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18529422879219054,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23799728453159333,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18529422879219054,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23799728453159333,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18529422879219054,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23799728453159333,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18529422879219054,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23799728453159333,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1818026602268219,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2335704207420349,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002599778026342392,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002599778026342392,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11709080636501312,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1516294687986374,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016743984539061784,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016743984539061784,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18529422879219054,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23799728453159333,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026497074868530035,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2309774073824331,
|
|
"calibration/batch_distribution_entropy": 0.9009881297477383,
|
|
"calibration/buffer_distribution_entropy": 0.9599412902555885,
|
|
"calibration/confidence_entropy": 0.3998726480587709,
|
|
"calibration/coverage@0%": 0.04887867647058823,
|
|
"calibration/coverage@1%": 0.04887867647058823,
|
|
"calibration/coverage@10%": 0.2439292279411765,
|
|
"calibration/coverage@15%": 0.363890931372549,
|
|
"calibration/coverage@20%": 0.49204044117647056,
|
|
"calibration/coverage@25%": 0.5959742647058823,
|
|
"calibration/coverage@30%": 0.6948376225490196,
|
|
"calibration/coverage@5%": 0.1371813725490196,
|
|
"calibration/ece": 0.1267814975727107,
|
|
"calibration/mean_confidence": 0.45705972449120064,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 444.0,
|
|
"completions/max_terminated_length": 444.0,
|
|
"completions/mean_length": 172.74423828125,
|
|
"completions/mean_terminated_length": 172.8115478515625,
|
|
"completions/min_length": 54.2,
|
|
"completions/min_terminated_length": 88.0,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0008090813644230366,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 638693051.0,
|
|
"reward": 0.9527804374694824,
|
|
"reward_std": 0.06479336544871331,
|
|
"rewards/accuracy_reward": 0.52177734375,
|
|
"rewards/brier_reward": 0.812950336933136,
|
|
"rewards/confidence_uniqueness_reward": 0.9374483942985534,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_coverage_0": 0.18071360886096954,
|
|
"rewards/frontier_coverage_1": 0.18071360886096954,
|
|
"rewards/frontier_coverage_10": 0.18071360886096954,
|
|
"rewards/frontier_coverage_15": 0.18071360886096954,
|
|
"rewards/frontier_coverage_20": 0.17192818522453307,
|
|
"rewards/frontier_coverage_25": 0.12003123611211777,
|
|
"rewards/frontier_coverage_5": 0.18071360886096954,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.089129638671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11902370899915696,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0445648193359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0445648193359375,
|
|
"signal/advantage_abs_mean": 0.04798509031534195,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04798509031534195,
|
|
"signal/advantage_pre_scale_std": 0.08976030051708221,
|
|
"signal/advantage_std": 0.08976030051708221,
|
|
"signal/brier_reward/centered_abs_mean": 0.12546005249023437,
|
|
"signal/brier_reward/group_std_mean": 0.16316278874874116,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01254600528627634,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01254600528627634,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03275583237409592,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04150558784604073,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003275583265349269,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003275583265349269,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.002762135770171881,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18598188161849977,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23882973790168763,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18598188161849977,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23882973790168763,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18598188161849977,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23882973790168763,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18598188161849977,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23882973790168763,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1740649312734604,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22380025088787078,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024891285225749015,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024891285225749015,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11340802162885666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14648526012897492,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016217347467318178,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016217347467318178,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18598188161849977,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23882973790168763,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026595407631248235,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24050447695605665,
|
|
"calibration/batch_distribution_entropy": 0.940744980652436,
|
|
"calibration/buffer_distribution_entropy": 0.9571343071662861,
|
|
"calibration/confidence_entropy": 0.4350306108006845,
|
|
"calibration/coverage@0%": 0.0171875,
|
|
"calibration/coverage@1%": 0.0171875,
|
|
"calibration/coverage@10%": 0.1765625,
|
|
"calibration/coverage@15%": 0.278515625,
|
|
"calibration/coverage@20%": 0.48203125,
|
|
"calibration/coverage@25%": 0.57890625,
|
|
"calibration/coverage@30%": 0.659375,
|
|
"calibration/coverage@5%": 0.05078125,
|
|
"calibration/ece": 0.11090657006311821,
|
|
"calibration/mean_confidence": 0.5068443759745327,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 404.4,
|
|
"completions/max_terminated_length": 404.4,
|
|
"completions/mean_length": 174.5966796875,
|
|
"completions/mean_terminated_length": 174.6303924560547,
|
|
"completions/min_length": 67.8,
|
|
"completions/min_terminated_length": 84.8,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0013040411286056042,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 655824825.0,
|
|
"reward": 0.9567921876907348,
|
|
"reward_std": 0.07369585633277893,
|
|
"rewards/accuracy_reward": 0.534375,
|
|
"rewards/brier_reward": 0.8031170845031739,
|
|
"rewards/confidence_uniqueness_reward": 0.9507421255111694,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_coverage_0": 0.15281389355659486,
|
|
"rewards/frontier_coverage_1": 0.15281389355659486,
|
|
"rewards/frontier_coverage_10": 0.15281389355659486,
|
|
"rewards/frontier_coverage_15": 0.15281389355659486,
|
|
"rewards/frontier_coverage_20": 0.14267186522483827,
|
|
"rewards/frontier_coverage_25": 0.09782160967588424,
|
|
"rewards/frontier_coverage_5": 0.15281389355659486,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.099169921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12814173698425294,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495849609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0495849609375,
|
|
"signal/advantage_abs_mean": 0.05669146254658699,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05669146254658699,
|
|
"signal/advantage_pre_scale_std": 0.10152237415313721,
|
|
"signal/advantage_std": 0.10152237415313721,
|
|
"signal/brier_reward/centered_abs_mean": 0.13695546239614487,
|
|
"signal/brier_reward/group_std_mean": 0.1752742975950241,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013695546798408032,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013695546798408032,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023070438578724862,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02995435558259487,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002307043923065066,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002307043923065066,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18192780315876006,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23622408807277678,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18192780315876006,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23622408807277678,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18192780315876006,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23622408807277678,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18192780315876006,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23622408807277678,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16533594131469725,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2150631368160248,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00236430405639112,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00236430405639112,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10322353839874268,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1351427912712097,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014760966412723064,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014760966412723064,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18192780315876006,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23622408807277678,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002601567655801773,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002601567655801773,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2662952529255084,
|
|
"calibration/batch_distribution_entropy": 0.9377166997975432,
|
|
"calibration/buffer_distribution_entropy": 0.9553364115728316,
|
|
"calibration/confidence_entropy": 0.441204286731301,
|
|
"calibration/coverage@0%": 0.04414138943248532,
|
|
"calibration/coverage@1%": 0.09422547700587083,
|
|
"calibration/coverage@10%": 0.3096624266144814,
|
|
"calibration/coverage@15%": 0.34836029231898236,
|
|
"calibration/coverage@20%": 0.4347167013209393,
|
|
"calibration/coverage@25%": 0.515987340998043,
|
|
"calibration/coverage@30%": 0.60390625,
|
|
"calibration/coverage@5%": 0.19084056996086105,
|
|
"calibration/ece": 0.16710317600982427,
|
|
"calibration/mean_confidence": 0.5716693336470889,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 448.2,
|
|
"completions/max_terminated_length": 448.2,
|
|
"completions/mean_length": 179.13974609375,
|
|
"completions/mean_terminated_length": 179.2987823486328,
|
|
"completions/min_length": 56.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0009510635281912982,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 673001904.0,
|
|
"reward": 0.968864917755127,
|
|
"reward_std": 0.06678919866681099,
|
|
"rewards/accuracy_reward": 0.5662109375,
|
|
"rewards/brier_reward": 0.7999752283096313,
|
|
"rewards/confidence_uniqueness_reward": 0.9497535705566407,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_coverage_0": 0.12023089975118637,
|
|
"rewards/frontier_coverage_1": 0.12023089975118637,
|
|
"rewards/frontier_coverage_10": 0.12023089975118637,
|
|
"rewards/frontier_coverage_15": 0.12023089975118637,
|
|
"rewards/frontier_coverage_20": 0.10779803842306138,
|
|
"rewards/frontier_coverage_25": 0.07608477771282196,
|
|
"rewards/frontier_coverage_5": 0.12023089975118637,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0839599609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1103439062833786,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04197998046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04197998046875,
|
|
"signal/advantage_abs_mean": 0.05087776109576225,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05087776109576225,
|
|
"signal/advantage_pre_scale_std": 0.09735682904720307,
|
|
"signal/advantage_std": 0.09735682904720307,
|
|
"signal/brier_reward/centered_abs_mean": 0.12388549447059631,
|
|
"signal/brier_reward/group_std_mean": 0.15970089435577392,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012388549372553825,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012388549372553825,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02408561371266842,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030729348585009576,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024085613898932935,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024085613898932935,
|
|
"signal/format_reward/centered_abs_mean": 0.001446533203125,
|
|
"signal/format_reward/group_std_mean": 0.0024173962883651257,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007232666015625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007232666015625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1510331243276596,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1984680712223053,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1510331243276596,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1984680712223053,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1510331243276596,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1984680712223053,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1510331243276596,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1984680712223053,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13272771388292312,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17479420006275176,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018980062566697597,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018980062566697597,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08362834304571151,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11021229475736619,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011958853341639043,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011958853341639043,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1510331243276596,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1984680712223053,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002159773651510477,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002159773651510477,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.46054927486329866,
|
|
"eval_calibration/batch_distribution_entropy": 0.8455805403689154,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9548862717030364,
|
|
"eval_calibration/confidence_entropy": 0.38920046266411673,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.140625,
|
|
"eval_calibration/coverage@25%": 0.28125,
|
|
"eval_calibration/coverage@30%": 0.296875,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.21491073393227433,
|
|
"eval_calibration/mean_confidence": 0.48616073393227427,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 359.0,
|
|
"eval_completions/max_terminated_length": 359.0,
|
|
"eval_completions/mean_length": 182.6583366394043,
|
|
"eval_completions/mean_terminated_length": 182.6583366394043,
|
|
"eval_completions/min_length": 94.25,
|
|
"eval_completions/min_terminated_length": 94.25,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 673001904.0,
|
|
"eval_reward": 0.902785137295723,
|
|
"eval_reward_std": 0.2266659364104271,
|
|
"eval_rewards/accuracy_reward": 0.431640625,
|
|
"eval_rewards/brier_reward": 0.7861361354589462,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.893310546875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_coverage_0": 0.2071906253695488,
|
|
"eval_rewards/frontier_coverage_1": 0.2071906253695488,
|
|
"eval_rewards/frontier_coverage_10": 0.2071906253695488,
|
|
"eval_rewards/frontier_coverage_15": 0.2071906253695488,
|
|
"eval_rewards/frontier_coverage_20": 0.18089748173952103,
|
|
"eval_rewards/frontier_coverage_25": 0.11322920396924019,
|
|
"eval_rewards/frontier_coverage_5": 0.2071906253695488,
|
|
"eval_runtime": 18.9985,
|
|
"eval_samples_per_second": 26.318,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49339816719293594,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/advantage_abs_mean": 0.21008000895380974,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21008000895380974,
|
|
"eval_signal/advantage_pre_scale_std": 0.22413352876901627,
|
|
"eval_signal/advantage_std": 0.22413352876901627,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2254427894949913,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2808932363986969,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022544278763234615,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022544278763234615,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.042633056640625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05089193116873503,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004263305920176208,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004263305920176208,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.37774093449115753,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.46647125482559204,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.37774093449115753,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.46647125482559204,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.37774093449115753,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.46647125482559204,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.37774093449115753,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.46647125482559204,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3283703997731209,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.40697097033262253,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00469569640699774,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00469569640699774,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.19348600879311562,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.24605557322502136,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027668499387800694,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027668499387800694,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.37774093449115753,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.46647125482559204,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005401695379987359,
|
|
"eval_steps_per_second": 0.211,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4363457773568281,
|
|
"calibration/batch_distribution_entropy": 0.9584845881514225,
|
|
"calibration/buffer_distribution_entropy": 0.954657899533002,
|
|
"calibration/confidence_entropy": 0.45774466342454057,
|
|
"calibration/coverage@0%": 0.00078125,
|
|
"calibration/coverage@1%": 0.00078125,
|
|
"calibration/coverage@10%": 0.00078125,
|
|
"calibration/coverage@15%": 0.00078125,
|
|
"calibration/coverage@20%": 0.00078125,
|
|
"calibration/coverage@25%": 0.0390625,
|
|
"calibration/coverage@30%": 0.19866071428571427,
|
|
"calibration/coverage@5%": 0.00078125,
|
|
"calibration/ece": 0.15967113039820166,
|
|
"calibration/mean_confidence": 0.5140276042389598,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 408.8,
|
|
"completions/max_terminated_length": 408.8,
|
|
"completions/mean_length": 181.09658203125,
|
|
"completions/mean_terminated_length": 181.09658203125,
|
|
"completions/min_length": 81.8,
|
|
"completions/min_terminated_length": 81.8,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0008898309315554798,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 689712877.0,
|
|
"reward": 0.9377357721328735,
|
|
"reward_std": 0.07557832449674606,
|
|
"rewards/accuracy_reward": 0.50634765625,
|
|
"rewards/brier_reward": 0.772177231311798,
|
|
"rewards/confidence_uniqueness_reward": 0.9539490699768066,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.1302212730050087,
|
|
"rewards/frontier_coverage_1": 0.1302212730050087,
|
|
"rewards/frontier_coverage_10": 0.1302212730050087,
|
|
"rewards/frontier_coverage_15": 0.1302212730050087,
|
|
"rewards/frontier_coverage_20": 0.11557191163301468,
|
|
"rewards/frontier_coverage_25": 0.0757653221487999,
|
|
"rewards/frontier_coverage_5": 0.1302212730050087,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.095648193359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12803646177053452,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0478240966796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0478240966796875,
|
|
"signal/advantage_abs_mean": 0.05762340724468231,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05762340724468231,
|
|
"signal/advantage_pre_scale_std": 0.10331527590751648,
|
|
"signal/advantage_std": 0.10331527590751648,
|
|
"signal/brier_reward/centered_abs_mean": 0.14113092422485352,
|
|
"signal/brier_reward/group_std_mean": 0.180476638674736,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014113092422485351,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014113092422485351,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02103535197675228,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027137762680649758,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002103535202331841,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002103535202331841,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.172061425447464,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22395667135715486,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.172061425447464,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22395667135715486,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.172061425447464,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22395667135715486,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.172061425447464,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22395667135715486,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1527923047542572,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19921277165412904,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021849300246685743,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021849300246685743,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09410947412252427,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12340447306632996,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013457655208185316,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013457655208185316,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.172061425447464,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22395667135715486,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024604784324765206,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29616188101698054,
|
|
"calibration/batch_distribution_entropy": 0.9400251706705747,
|
|
"calibration/buffer_distribution_entropy": 0.9554609832485716,
|
|
"calibration/confidence_entropy": 0.43868229938830494,
|
|
"calibration/coverage@0%": 0.00859375,
|
|
"calibration/coverage@1%": 0.00859375,
|
|
"calibration/coverage@10%": 0.088671875,
|
|
"calibration/coverage@15%": 0.175,
|
|
"calibration/coverage@20%": 0.25703125,
|
|
"calibration/coverage@25%": 0.373046875,
|
|
"calibration/coverage@30%": 0.45546875,
|
|
"calibration/coverage@5%": 0.069921875,
|
|
"calibration/ece": 0.1463105905099502,
|
|
"calibration/mean_confidence": 0.5178573725467299,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 481.4,
|
|
"completions/max_terminated_length": 481.4,
|
|
"completions/mean_length": 181.1900390625,
|
|
"completions/mean_terminated_length": 181.22657470703126,
|
|
"completions/min_length": 69.4,
|
|
"completions/min_terminated_length": 87.8,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.0008848632569424808,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 706481703.0,
|
|
"reward": 0.9508843898773194,
|
|
"reward_std": 0.06910986453294754,
|
|
"rewards/accuracy_reward": 0.52646484375,
|
|
"rewards/brier_reward": 0.7914854645729065,
|
|
"rewards/confidence_uniqueness_reward": 0.9468018293380738,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.14879233986139298,
|
|
"rewards/frontier_coverage_1": 0.14879233986139298,
|
|
"rewards/frontier_coverage_10": 0.14879233986139298,
|
|
"rewards/frontier_coverage_15": 0.14879233986139298,
|
|
"rewards/frontier_coverage_20": 0.13711362332105637,
|
|
"rewards/frontier_coverage_25": 0.0924137145280838,
|
|
"rewards/frontier_coverage_5": 0.14879233986139298,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.099713134765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12950885742902757,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0498565673828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0498565673828125,
|
|
"signal/advantage_abs_mean": 0.052921504527330396,
|
|
"signal/advantage_pre_scale_abs_mean": 0.052921504527330396,
|
|
"signal/advantage_pre_scale_std": 0.09709036946296692,
|
|
"signal/advantage_std": 0.09709036946296692,
|
|
"signal/brier_reward/centered_abs_mean": 0.1296452760696411,
|
|
"signal/brier_reward/group_std_mean": 0.16551610827445984,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012964527495205403,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012964527495205403,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0252695769071579,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03185669630765915,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002526957681402564,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002526957681402564,
|
|
"signal/format_reward/centered_abs_mean": 0.0003662109375,
|
|
"signal/format_reward/group_std_mean": 0.000768545875325799,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1746266096830368,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22620674967765808,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1746266096830368,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22620674967765808,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1746266096830368,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22620674967765808,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1746266096830368,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22620674967765808,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15180024206638337,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19782575070858002,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021707434207201004,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021707434207201004,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09401053637266159,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12310894876718521,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00134435067884624,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00134435067884624,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1746266096830368,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22620674967765808,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024971604347229005,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3350253442444463,
|
|
"calibration/batch_distribution_entropy": 0.9553660352834303,
|
|
"calibration/buffer_distribution_entropy": 0.9556529548688049,
|
|
"calibration/confidence_entropy": 0.45873064609980724,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.14375,
|
|
"calibration/coverage@15%": 0.23359375,
|
|
"calibration/coverage@20%": 0.33203125,
|
|
"calibration/coverage@25%": 0.490234375,
|
|
"calibration/coverage@30%": 0.576953125,
|
|
"calibration/coverage@5%": 0.034375,
|
|
"calibration/ece": 0.14170870449303774,
|
|
"calibration/mean_confidence": 0.4800435907009698,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 439.2,
|
|
"completions/max_terminated_length": 439.2,
|
|
"completions/mean_length": 185.12431640625,
|
|
"completions/mean_terminated_length": 185.16006469726562,
|
|
"completions/min_length": 52.6,
|
|
"completions/min_terminated_length": 89.6,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0012188840191811323,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 723331296.0,
|
|
"reward": 0.9551563620567322,
|
|
"reward_std": 0.06885315775871277,
|
|
"rewards/accuracy_reward": 0.5369140625,
|
|
"rewards/brier_reward": 0.7924714326858521,
|
|
"rewards/confidence_uniqueness_reward": 0.9522701382637024,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.13426284790039061,
|
|
"rewards/frontier_coverage_1": 0.13426284790039061,
|
|
"rewards/frontier_coverage_10": 0.13426284790039061,
|
|
"rewards/frontier_coverage_15": 0.13426284790039061,
|
|
"rewards/frontier_coverage_20": 0.11316078677773475,
|
|
"rewards/frontier_coverage_25": 0.07726155370473861,
|
|
"rewards/frontier_coverage_5": 0.13426284790039061,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0993408203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13218309581279755,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04967041015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04967041015625,
|
|
"signal/advantage_abs_mean": 0.05133445784449577,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05133445784449577,
|
|
"signal/advantage_pre_scale_std": 0.09505542218685151,
|
|
"signal/advantage_std": 0.09505542218685151,
|
|
"signal/brier_reward/centered_abs_mean": 0.1251007065176964,
|
|
"signal/brier_reward/group_std_mean": 0.16029010117053985,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012510071508586407,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012510071508586407,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020702214539051057,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.026370349898934366,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00207022144459188,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00207022144459188,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17874427139759064,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22735729515552522,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17874427139759064,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22735729515552522,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17874427139759064,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22735729515552522,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17874427139759064,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22735729515552522,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1514152020215988,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19314254224300384,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021652374416589737,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021652374416589737,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09458372592926026,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12103196978569031,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013525472953915597,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013525472953915597,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17874427139759064,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22735729515552522,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002556043164804578,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002556043164804578,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2551421501062629,
|
|
"calibration/batch_distribution_entropy": 0.9429821506693088,
|
|
"calibration/buffer_distribution_entropy": 0.9568870103860352,
|
|
"calibration/confidence_entropy": 0.43942119152318354,
|
|
"calibration/coverage@0%": 0.019921875,
|
|
"calibration/coverage@1%": 0.019921875,
|
|
"calibration/coverage@10%": 0.0671875,
|
|
"calibration/coverage@15%": 0.20703125,
|
|
"calibration/coverage@20%": 0.42109375,
|
|
"calibration/coverage@25%": 0.541015625,
|
|
"calibration/coverage@30%": 0.638671875,
|
|
"calibration/coverage@5%": 0.02734375,
|
|
"calibration/ece": 0.10697865923649652,
|
|
"calibration/mean_confidence": 0.5212105144066516,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 484.6,
|
|
"completions/max_terminated_length": 484.6,
|
|
"completions/mean_length": 184.15478515625,
|
|
"completions/mean_terminated_length": 184.2274963378906,
|
|
"completions/min_length": 35.0,
|
|
"completions/min_terminated_length": 91.6,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.0008549767080694437,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 740083185.0,
|
|
"reward": 0.9608247399330139,
|
|
"reward_std": 0.06562578678131104,
|
|
"rewards/accuracy_reward": 0.54541015625,
|
|
"rewards/brier_reward": 0.8056188941001892,
|
|
"rewards/confidence_uniqueness_reward": 0.9541590809822083,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_coverage_0": 0.13397103250026704,
|
|
"rewards/frontier_coverage_1": 0.13397103250026704,
|
|
"rewards/frontier_coverage_10": 0.13397103250026704,
|
|
"rewards/frontier_coverage_15": 0.13397103250026704,
|
|
"rewards/frontier_coverage_20": 0.11571932882070542,
|
|
"rewards/frontier_coverage_25": 0.08058026283979416,
|
|
"rewards/frontier_coverage_5": 0.13397103250026704,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083770751953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11368496417999267,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0418853759765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0418853759765625,
|
|
"signal/advantage_abs_mean": 0.04889403432607651,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04889403432607651,
|
|
"signal/advantage_pre_scale_std": 0.09222806245088577,
|
|
"signal/advantage_std": 0.09222806245088577,
|
|
"signal/brier_reward/centered_abs_mean": 0.12414066046476364,
|
|
"signal/brier_reward/group_std_mean": 0.15926886796951295,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01241406574845314,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01241406574845314,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020242217183113097,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027059277519583702,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020242216996848583,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020242216996848583,
|
|
"signal/format_reward/centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/group_std_mean": 0.0024258273653686045,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16520517766475679,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21402345597743988,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16520517766475679,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21402345597743988,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16520517766475679,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21402345597743988,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16520517766475679,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21402345597743988,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13321104198694228,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1731552869081497,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019049178808927536,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019049178808927536,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08578807860612869,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11171852350234986,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012267695274204016,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012267695274204016,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16520517766475679,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21402345597743988,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002362434100359678,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002362434100359678,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2324519263969714,
|
|
"calibration/batch_distribution_entropy": 0.9606665053848438,
|
|
"calibration/buffer_distribution_entropy": 0.9569371832498403,
|
|
"calibration/confidence_entropy": 0.4423637270520454,
|
|
"calibration/coverage@0%": 0.056640625,
|
|
"calibration/coverage@1%": 0.069921875,
|
|
"calibration/coverage@10%": 0.2640625,
|
|
"calibration/coverage@15%": 0.34140625,
|
|
"calibration/coverage@20%": 0.516015625,
|
|
"calibration/coverage@25%": 0.603515625,
|
|
"calibration/coverage@30%": 0.677734375,
|
|
"calibration/coverage@5%": 0.15,
|
|
"calibration/ece": 0.14048664549176487,
|
|
"calibration/mean_confidence": 0.5234643985083343,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 452.4,
|
|
"completions/max_terminated_length": 452.4,
|
|
"completions/mean_length": 185.41982421875,
|
|
"completions/mean_terminated_length": 185.41982421875,
|
|
"completions/min_length": 89.0,
|
|
"completions/min_terminated_length": 89.0,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0010070810094475746,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 756991740.0,
|
|
"reward": 0.9701446890830994,
|
|
"reward_std": 0.06546642929315567,
|
|
"rewards/accuracy_reward": 0.56376953125,
|
|
"rewards/brier_reward": 0.8114647984504699,
|
|
"rewards/confidence_uniqueness_reward": 0.9570422768592834,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.1252898707985878,
|
|
"rewards/frontier_coverage_1": 0.1252898707985878,
|
|
"rewards/frontier_coverage_10": 0.1252898707985878,
|
|
"rewards/frontier_coverage_15": 0.1252898707985878,
|
|
"rewards/frontier_coverage_20": 0.10252745747566223,
|
|
"rewards/frontier_coverage_25": 0.07228612750768662,
|
|
"rewards/frontier_coverage_5": 0.1252898707985878,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085198974609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12007367312908172,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0425994873046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0425994873046875,
|
|
"signal/advantage_abs_mean": 0.04709207341074943,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04709207341074943,
|
|
"signal/advantage_pre_scale_std": 0.09191109389066696,
|
|
"signal/advantage_std": 0.09191109389066696,
|
|
"signal/brier_reward/centered_abs_mean": 0.11166439652442932,
|
|
"signal/brier_reward/group_std_mean": 0.14501525461673737,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011166440136730672,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011166440136730672,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017501908540725707,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.022045810893177985,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017501908587291837,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017501908587291837,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15232807993888856,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19803299903869628,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15232807993888856,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19803299903869628,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15232807993888856,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19803299903869628,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15232807993888856,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19803299903869628,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11411124169826507,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1488576978445053,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016317907487973572,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016317907487973572,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07262995690107346,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09404050707817077,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010386083857156336,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010386083857156336,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15232807993888856,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19803299903869628,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021782914409413934,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2439529951333029,
|
|
"calibration/batch_distribution_entropy": 0.9487447159000004,
|
|
"calibration/buffer_distribution_entropy": 0.9573477855546649,
|
|
"calibration/confidence_entropy": 0.44479426104559405,
|
|
"calibration/coverage@0%": 0.006642918297455969,
|
|
"calibration/coverage@1%": 0.006642918297455969,
|
|
"calibration/coverage@10%": 0.14687729329745597,
|
|
"calibration/coverage@15%": 0.36093979329745596,
|
|
"calibration/coverage@20%": 0.47461166829745594,
|
|
"calibration/coverage@25%": 0.5570771159491195,
|
|
"calibration/coverage@30%": 0.6911218811154598,
|
|
"calibration/coverage@5%": 0.08086166829745597,
|
|
"calibration/ece": 0.13735173236558448,
|
|
"calibration/mean_confidence": 0.5550578967354733,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 566.0,
|
|
"completions/max_terminated_length": 566.0,
|
|
"completions/mean_length": 187.6255859375,
|
|
"completions/mean_terminated_length": 187.6255859375,
|
|
"completions/min_length": 88.8,
|
|
"completions/min_terminated_length": 88.8,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0006303332047536969,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 773852610.0,
|
|
"reward": 0.9724397420883178,
|
|
"reward_std": 0.061435190588235856,
|
|
"rewards/accuracy_reward": 0.57197265625,
|
|
"rewards/brier_reward": 0.8023635387420655,
|
|
"rewards/confidence_uniqueness_reward": 0.9555821895599366,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.11602756455540657,
|
|
"rewards/frontier_coverage_1": 0.11602756455540657,
|
|
"rewards/frontier_coverage_10": 0.11602756455540657,
|
|
"rewards/frontier_coverage_15": 0.11602756455540657,
|
|
"rewards/frontier_coverage_20": 0.09671139717102051,
|
|
"rewards/frontier_coverage_25": 0.07193926870822906,
|
|
"rewards/frontier_coverage_5": 0.11602756455540657,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.078399658203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10787871330976487,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0391998291015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0391998291015625,
|
|
"signal/advantage_abs_mean": 0.04567759558558464,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04567759558558464,
|
|
"signal/advantage_pre_scale_std": 0.08717550188302994,
|
|
"signal/advantage_std": 0.08717550188302994,
|
|
"signal/brier_reward/centered_abs_mean": 0.11523260176181793,
|
|
"signal/brier_reward/group_std_mean": 0.14933998584747316,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011523259989917278,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011523259989917278,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.018767333030700682,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02405969724059105,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0018767332891002297,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018767332891002297,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15312386751174928,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19944342970848083,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15312386751174928,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19944342970848083,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15312386751174928,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19944342970848083,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15312386751174928,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19944342970848083,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11110765635967254,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1452748954296112,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015888395719230175,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015888395719230175,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07131384164094925,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09314024895429611,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010197879397310316,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010197879397310316,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15312386751174928,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19944342970848083,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002189671341329813,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002189671341329813,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2604849656231073,
|
|
"calibration/batch_distribution_entropy": 0.9362567886264237,
|
|
"calibration/buffer_distribution_entropy": 0.9574063927247802,
|
|
"calibration/confidence_entropy": 0.4310544919287905,
|
|
"calibration/coverage@0%": 0.017578125,
|
|
"calibration/coverage@1%": 0.017578125,
|
|
"calibration/coverage@10%": 0.165625,
|
|
"calibration/coverage@15%": 0.347265625,
|
|
"calibration/coverage@20%": 0.441015625,
|
|
"calibration/coverage@25%": 0.523046875,
|
|
"calibration/coverage@30%": 0.62578125,
|
|
"calibration/coverage@5%": 0.0625,
|
|
"calibration/ece": 0.1265339089932299,
|
|
"calibration/mean_confidence": 0.4604680332885215,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 477.4,
|
|
"completions/max_terminated_length": 477.4,
|
|
"completions/mean_length": 189.84384765625,
|
|
"completions/mean_terminated_length": 189.86265258789064,
|
|
"completions/min_length": 72.4,
|
|
"completions/min_terminated_length": 92.2,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.000941340927965939,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 791023811.0,
|
|
"reward": 0.9691111087799072,
|
|
"reward_std": 0.06380453407764435,
|
|
"rewards/accuracy_reward": 0.5640625,
|
|
"rewards/brier_reward": 0.8048084497451782,
|
|
"rewards/confidence_uniqueness_reward": 0.9539137840270996,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.12513699382543564,
|
|
"rewards/frontier_coverage_1": 0.12513699382543564,
|
|
"rewards/frontier_coverage_10": 0.12513699382543564,
|
|
"rewards/frontier_coverage_15": 0.12513699382543564,
|
|
"rewards/frontier_coverage_20": 0.09549697563052177,
|
|
"rewards/frontier_coverage_25": 0.06598303094506264,
|
|
"rewards/frontier_coverage_5": 0.12513699382543564,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08544921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1140655368566513,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042724609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042724609375,
|
|
"signal/advantage_abs_mean": 0.04823839291930199,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04823839291930199,
|
|
"signal/advantage_pre_scale_std": 0.093255215883255,
|
|
"signal/advantage_std": 0.093255215883255,
|
|
"signal/brier_reward/centered_abs_mean": 0.10841264575719833,
|
|
"signal/brier_reward/group_std_mean": 0.14089754223823547,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010841264761984349,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010841264761984349,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019311527907848357,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02457045093178749,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019311528420075774,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019311528420075774,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14460354149341584,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18843676149845123,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14460354149341584,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18843676149845123,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14460354149341584,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18843676149845123,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14460354149341584,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18843676149845123,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1025826632976532,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13485993444919586,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014669320778921246,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014669320778921246,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0643330879509449,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08459075540304184,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009199631633237005,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009199631633237005,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14460354149341584,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18843676149845123,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020678306696936487,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2686377140883339,
|
|
"calibration/batch_distribution_entropy": 0.9594533632566217,
|
|
"calibration/buffer_distribution_entropy": 0.9578625978207885,
|
|
"calibration/confidence_entropy": 0.4627788377016112,
|
|
"calibration/coverage@0%": 0.056640625,
|
|
"calibration/coverage@1%": 0.0703125,
|
|
"calibration/coverage@10%": 0.269140625,
|
|
"calibration/coverage@15%": 0.32890625,
|
|
"calibration/coverage@20%": 0.41996162548923677,
|
|
"calibration/coverage@25%": 0.47818539016634054,
|
|
"calibration/coverage@30%": 0.5680505442759295,
|
|
"calibration/coverage@5%": 0.17578125,
|
|
"calibration/ece": 0.18309338930519575,
|
|
"calibration/mean_confidence": 0.4839827718853355,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 520.0,
|
|
"completions/max_terminated_length": 520.0,
|
|
"completions/mean_length": 194.88076171875,
|
|
"completions/mean_terminated_length": 194.92012939453124,
|
|
"completions/min_length": 74.0,
|
|
"completions/min_terminated_length": 93.2,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0009122573537752032,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 807952094.0,
|
|
"reward": 0.9469700813293457,
|
|
"reward_std": 0.06870835795998573,
|
|
"rewards/accuracy_reward": 0.51123046875,
|
|
"rewards/brier_reward": 0.8135611772537231,
|
|
"rewards/confidence_uniqueness_reward": 0.9532927393913269,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_coverage_0": 0.16543679535388947,
|
|
"rewards/frontier_coverage_1": 0.16543679535388947,
|
|
"rewards/frontier_coverage_10": 0.16543679535388947,
|
|
"rewards/frontier_coverage_15": 0.16529496312141417,
|
|
"rewards/frontier_coverage_20": 0.12598445862531663,
|
|
"rewards/frontier_coverage_25": 0.08305399417877198,
|
|
"rewards/frontier_coverage_5": 0.16543679535388947,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091339111328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11980533748865127,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0456695556640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0456695556640625,
|
|
"signal/advantage_abs_mean": 0.052151027321815493,
|
|
"signal/advantage_pre_scale_abs_mean": 0.052151027321815493,
|
|
"signal/advantage_pre_scale_std": 0.0985235944390297,
|
|
"signal/advantage_std": 0.0985235944390297,
|
|
"signal/brier_reward/centered_abs_mean": 0.11318521946668625,
|
|
"signal/brier_reward/group_std_mean": 0.14390725791454315,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011318522319197655,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011318522319197655,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02046610079705715,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.026208149269223213,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020466101123020053,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020466101123020053,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1562270164489746,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20009250938892365,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1562270164489746,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20009250938892365,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1562270164489746,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20009250938892365,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15608695149421692,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19990470111370087,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022320433985441924,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022320433985441924,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11054950356483459,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14203203320503235,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015808578580617904,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015808578580617904,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06979388296604157,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08927626758813859,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009980525122955442,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009980525122955442,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1562270164489746,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20009250938892365,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022340463940054177,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3197772747706285,
|
|
"calibration/batch_distribution_entropy": 0.9217777615802009,
|
|
"calibration/buffer_distribution_entropy": 0.9572320168834499,
|
|
"calibration/confidence_entropy": 0.4192123665672717,
|
|
"calibration/coverage@0%": 0.049236331947162426,
|
|
"calibration/coverage@1%": 0.051189456947162426,
|
|
"calibration/coverage@10%": 0.20206778987279844,
|
|
"calibration/coverage@15%": 0.2517153864970646,
|
|
"calibration/coverage@20%": 0.2955112524461839,
|
|
"calibration/coverage@25%": 0.3643162915851272,
|
|
"calibration/coverage@30%": 0.4334989603718199,
|
|
"calibration/coverage@5%": 0.14654170743639922,
|
|
"calibration/ece": 0.16162157978207495,
|
|
"calibration/mean_confidence": 0.5331659593344009,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 517.2,
|
|
"completions/max_terminated_length": 517.2,
|
|
"completions/mean_length": 194.9912109375,
|
|
"completions/mean_terminated_length": 195.0108184814453,
|
|
"completions/min_length": 77.4,
|
|
"completions/min_terminated_length": 96.2,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0007836997392587364,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 825123172.0,
|
|
"reward": 0.9639974594116211,
|
|
"reward_std": 0.06840100809931755,
|
|
"rewards/accuracy_reward": 0.56015625,
|
|
"rewards/brier_reward": 0.7868908882141114,
|
|
"rewards/confidence_uniqueness_reward": 0.9535213708877563,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.11056532710790634,
|
|
"rewards/frontier_coverage_1": 0.11056532710790634,
|
|
"rewards/frontier_coverage_10": 0.11056532710790634,
|
|
"rewards/frontier_coverage_15": 0.1084191419184208,
|
|
"rewards/frontier_coverage_20": 0.08252720981836319,
|
|
"rewards/frontier_coverage_25": 0.06098323464393616,
|
|
"rewards/frontier_coverage_5": 0.11056532710790634,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0933349609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1282936602830887,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04666748046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04666748046875,
|
|
"signal/advantage_abs_mean": 0.05056538209319115,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05056538209319115,
|
|
"signal/advantage_pre_scale_std": 0.09555504471063614,
|
|
"signal/advantage_std": 0.09555504471063614,
|
|
"signal/brier_reward/centered_abs_mean": 0.11904580742120743,
|
|
"signal/brier_reward/group_std_mean": 0.15341890454292298,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011904580891132355,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011904580891132355,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019766898453235628,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.024970807135105133,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019766898592934014,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019766898592934014,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15665509104728698,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20551926791667938,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15665509104728698,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20551926791667938,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15665509104728698,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20551926791667938,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1528875708580017,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20064776241779328,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002186292293481529,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002186292293481529,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10197529047727585,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13459382504224776,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014582466334104537,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014582466334104537,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06756449341773987,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08749876469373703,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009661722113378346,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009661722113378346,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15665509104728698,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20551926791667938,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022401677910238505,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20359943680556297,
|
|
"calibration/batch_distribution_entropy": 0.9231851021411351,
|
|
"calibration/buffer_distribution_entropy": 0.9563622314749581,
|
|
"calibration/confidence_entropy": 0.4116085176287439,
|
|
"calibration/coverage@0%": 0.015625,
|
|
"calibration/coverage@1%": 0.015625,
|
|
"calibration/coverage@10%": 0.262109375,
|
|
"calibration/coverage@15%": 0.471484375,
|
|
"calibration/coverage@20%": 0.588671875,
|
|
"calibration/coverage@25%": 0.662109375,
|
|
"calibration/coverage@30%": 0.752734375,
|
|
"calibration/coverage@5%": 0.07421875,
|
|
"calibration/ece": 0.09644673492421116,
|
|
"calibration/mean_confidence": 0.5133434148481435,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 451.8,
|
|
"completions/max_terminated_length": 451.8,
|
|
"completions/mean_length": 194.51513671875,
|
|
"completions/mean_terminated_length": 194.53388671875,
|
|
"completions/min_length": 78.4,
|
|
"completions/min_terminated_length": 98.0,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0010863860370591283,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 842125567.0,
|
|
"reward": 0.9804993987083435,
|
|
"reward_std": 0.06534842252731324,
|
|
"rewards/accuracy_reward": 0.58642578125,
|
|
"rewards/brier_reward": 0.8142348051071167,
|
|
"rewards/confidence_uniqueness_reward": 0.9505360841751098,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.12197792679071426,
|
|
"rewards/frontier_coverage_1": 0.12197792679071426,
|
|
"rewards/frontier_coverage_10": 0.12197792679071426,
|
|
"rewards/frontier_coverage_15": 0.11846152395009994,
|
|
"rewards/frontier_coverage_20": 0.08628106266260147,
|
|
"rewards/frontier_coverage_25": 0.06666406691074371,
|
|
"rewards/frontier_coverage_5": 0.12197792679071426,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.089093017578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11905532628297806,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0445465087890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0445465087890625,
|
|
"signal/advantage_abs_mean": 0.04896164536476135,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04896164536476135,
|
|
"signal/advantage_pre_scale_std": 0.09614251106977463,
|
|
"signal/advantage_std": 0.09614251106977463,
|
|
"signal/brier_reward/centered_abs_mean": 0.10979892462491989,
|
|
"signal/brier_reward/group_std_mean": 0.14249781668186187,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010979892686009407,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010979892686009407,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022416341677308084,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028693411871790885,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022416341584175826,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022416341584175826,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14336768090724944,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18752407133579255,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14336768090724944,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18752407133579255,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14336768090724944,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18752407133579255,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13639699816703796,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17871999144554138,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019504770869389176,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019504770869389176,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09066965878009796,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11953730136156082,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001296576135791838,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001296576135791838,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.060934024304151534,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07885423004627228,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008713565533980727,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008713565533980727,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14336768090724944,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18752407133579255,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020501580554991962,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.4583315773752878,
|
|
"eval_calibration/batch_distribution_entropy": 0.893029162438776,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9562239211237429,
|
|
"eval_calibration/confidence_entropy": 0.40999648241332165,
|
|
"eval_calibration/coverage@0%": 0.1015625,
|
|
"eval_calibration/coverage@1%": 0.1015625,
|
|
"eval_calibration/coverage@10%": 0.1015625,
|
|
"eval_calibration/coverage@15%": 0.1171875,
|
|
"eval_calibration/coverage@20%": 0.1484375,
|
|
"eval_calibration/coverage@25%": 0.1875,
|
|
"eval_calibration/coverage@30%": 0.1875,
|
|
"eval_calibration/coverage@5%": 0.1015625,
|
|
"eval_calibration/ece": 0.21749381230468748,
|
|
"eval_calibration/mean_confidence": 0.4812561876953125,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 341.5,
|
|
"eval_completions/max_terminated_length": 341.5,
|
|
"eval_completions/mean_length": 200.39931106567383,
|
|
"eval_completions/mean_terminated_length": 200.39931106567383,
|
|
"eval_completions/min_length": 113.5,
|
|
"eval_completions/min_terminated_length": 113.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 842125567.0,
|
|
"eval_reward": 0.9045292139053345,
|
|
"eval_reward_std": 0.23102111369371414,
|
|
"eval_rewards/accuracy_reward": 0.4375,
|
|
"eval_rewards/brier_reward": 0.7857947647571564,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.89599609375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_coverage_0": 0.20588579028844833,
|
|
"eval_rewards/frontier_coverage_1": 0.20588579028844833,
|
|
"eval_rewards/frontier_coverage_10": 0.20588579028844833,
|
|
"eval_rewards/frontier_coverage_15": 0.1928391382098198,
|
|
"eval_rewards/frontier_coverage_20": 0.13208013586699963,
|
|
"eval_rewards/frontier_coverage_25": 0.08231428451836109,
|
|
"eval_rewards/frontier_coverage_5": 0.20588579028844833,
|
|
"eval_runtime": 18.2572,
|
|
"eval_samples_per_second": 27.387,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.478759765625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49678919464349747,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2393798828125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2393798828125,
|
|
"eval_signal/advantage_abs_mean": 0.21644001826643944,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21644001826643944,
|
|
"eval_signal/advantage_pre_scale_std": 0.22859064117074013,
|
|
"eval_signal/advantage_std": 0.22859064117074013,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22941264510154724,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28201356530189514,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022941263858228922,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022941263858228922,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0435028076171875,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051835235208272934,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004350280680228025,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004350280680228025,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3821023553609848,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4686368927359581,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3821023553609848,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4686368927359581,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3821023553609848,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4686368927359581,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.35555680841207504,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.436874620616436,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005084462347440422,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005084462347440422,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.23739305138587952,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.2956453561782837,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033947205520235,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033947205520235,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.12688638269901276,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.16197730228304863,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018144752539228648,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018144752539228648,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3821023553609848,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4686368927359581,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005464063957333565,
|
|
"eval_steps_per_second": 0.219,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23015374464443203,
|
|
"calibration/batch_distribution_entropy": 0.8576458054480749,
|
|
"calibration/buffer_distribution_entropy": 0.9553468512920373,
|
|
"calibration/confidence_entropy": 0.3716785624732463,
|
|
"calibration/coverage@0%": 0.004296875,
|
|
"calibration/coverage@1%": 0.046875,
|
|
"calibration/coverage@10%": 0.16953125,
|
|
"calibration/coverage@15%": 0.233203125,
|
|
"calibration/coverage@20%": 0.431640625,
|
|
"calibration/coverage@25%": 0.6375,
|
|
"calibration/coverage@30%": 0.790625,
|
|
"calibration/coverage@5%": 0.11796875,
|
|
"calibration/ece": 0.14930048014804023,
|
|
"calibration/mean_confidence": 0.5608904347646698,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 456.8,
|
|
"completions/max_terminated_length": 456.8,
|
|
"completions/mean_length": 194.08486328125,
|
|
"completions/mean_terminated_length": 194.10433349609374,
|
|
"completions/min_length": 76.8,
|
|
"completions/min_terminated_length": 96.2,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.0009816524107009172,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 859212164.0,
|
|
"reward": 0.9736658692359924,
|
|
"reward_std": 0.06884004175662994,
|
|
"rewards/accuracy_reward": 0.58291015625,
|
|
"rewards/brier_reward": 0.7866194605827331,
|
|
"rewards/confidence_uniqueness_reward": 0.9460584640502929,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.0991329938173294,
|
|
"rewards/frontier_coverage_1": 0.0991329938173294,
|
|
"rewards/frontier_coverage_10": 0.0991329938173294,
|
|
"rewards/frontier_coverage_15": 0.0950236402451992,
|
|
"rewards/frontier_coverage_20": 0.07604901492595673,
|
|
"rewards/frontier_coverage_25": 0.061194049566984175,
|
|
"rewards/frontier_coverage_5": 0.0991329938173294,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088104248046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1207100659608841,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0440521240234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0440521240234375,
|
|
"signal/advantage_abs_mean": 0.05134280025959015,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05134280025959015,
|
|
"signal/advantage_pre_scale_std": 0.09874342083930969,
|
|
"signal/advantage_std": 0.09874342083930969,
|
|
"signal/brier_reward/centered_abs_mean": 0.12759677469730377,
|
|
"signal/brier_reward/group_std_mean": 0.16383454203605652,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012759677693247795,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012759677693247795,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02629918046295643,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03357274830341339,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026299181394279004,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026299181394279004,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15177842378616332,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19879043400287627,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15177842378616332,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19879043400287627,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15177842378616332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19879043400287627,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14039504528045654,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18412175476551057,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00200764921028167,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00200764921028167,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0951578825712204,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12507294863462448,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013607577420771122,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013607577420771122,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06570944413542748,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08424745202064514,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000939645036123693,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000939645036123693,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15177842378616332,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19879043400287627,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021704314742237328,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28512608942683115,
|
|
"calibration/batch_distribution_entropy": 0.9006837544803672,
|
|
"calibration/buffer_distribution_entropy": 0.9535822255765938,
|
|
"calibration/confidence_entropy": 0.40338913422993167,
|
|
"calibration/coverage@0%": 0.02890625,
|
|
"calibration/coverage@1%": 0.02890625,
|
|
"calibration/coverage@10%": 0.21484375,
|
|
"calibration/coverage@15%": 0.26953125,
|
|
"calibration/coverage@20%": 0.33671875,
|
|
"calibration/coverage@25%": 0.402734375,
|
|
"calibration/coverage@30%": 0.544140625,
|
|
"calibration/coverage@5%": 0.1546875,
|
|
"calibration/ece": 0.13350636173462058,
|
|
"calibration/mean_confidence": 0.5006771214138114,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 439.2,
|
|
"completions/max_terminated_length": 439.2,
|
|
"completions/mean_length": 195.5736328125,
|
|
"completions/mean_terminated_length": 195.5927001953125,
|
|
"completions/min_length": 76.8,
|
|
"completions/min_terminated_length": 96.0,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0008164091850630939,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 876223190.0,
|
|
"reward": 0.9648186922073364,
|
|
"reward_std": 0.0649384766817093,
|
|
"rewards/accuracy_reward": 0.553125,
|
|
"rewards/brier_reward": 0.8103304743766785,
|
|
"rewards/confidence_uniqueness_reward": 0.9441303372383117,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.14813297837972642,
|
|
"rewards/frontier_coverage_1": 0.14813297837972642,
|
|
"rewards/frontier_coverage_10": 0.14813297837972642,
|
|
"rewards/frontier_coverage_15": 0.13433591276407242,
|
|
"rewards/frontier_coverage_20": 0.09811097532510757,
|
|
"rewards/frontier_coverage_25": 0.07424705252051353,
|
|
"rewards/frontier_coverage_5": 0.14813297837972642,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0893310546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12017730772495269,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04466552734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04466552734375,
|
|
"signal/advantage_abs_mean": 0.048449646681547165,
|
|
"signal/advantage_pre_scale_abs_mean": 0.048449646681547165,
|
|
"signal/advantage_pre_scale_std": 0.09641486257314683,
|
|
"signal/advantage_std": 0.09641486257314683,
|
|
"signal/brier_reward/centered_abs_mean": 0.11072713136672974,
|
|
"signal/brier_reward/group_std_mean": 0.1412588134407997,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011072713136672973,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011072713136672973,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02563716545701027,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03275141529738903,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025637165643274786,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025637165643274786,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14630222022533418,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.189518603682518,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002092121751047671,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002092121751047671,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14630222022533418,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.189518603682518,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002092121751047671,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002092121751047671,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14630222022533418,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.189518603682518,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002092121751047671,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002092121751047671,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1310984805226326,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16997389793395995,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018747082212939858,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018747082212939858,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09214921295642853,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11910515576601029,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013177337590605021,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013177337590605021,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.062527135014534,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07901622802019119,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008941379957832396,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008941379957832396,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14630222022533418,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.189518603682518,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002092121751047671,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002092121751047671,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30311595878798575,
|
|
"calibration/batch_distribution_entropy": 0.9193359670756618,
|
|
"calibration/buffer_distribution_entropy": 0.9533215398340383,
|
|
"calibration/confidence_entropy": 0.4322883837207173,
|
|
"calibration/coverage@0%": 0.023046875,
|
|
"calibration/coverage@1%": 0.023046875,
|
|
"calibration/coverage@10%": 0.20078125,
|
|
"calibration/coverage@15%": 0.271484375,
|
|
"calibration/coverage@20%": 0.429296875,
|
|
"calibration/coverage@25%": 0.497265625,
|
|
"calibration/coverage@30%": 0.5578125,
|
|
"calibration/coverage@5%": 0.125,
|
|
"calibration/ece": 0.1747222471321523,
|
|
"calibration/mean_confidence": 0.5455925214912242,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 425.4,
|
|
"completions/max_terminated_length": 425.4,
|
|
"completions/mean_length": 195.658984375,
|
|
"completions/mean_terminated_length": 195.67786865234376,
|
|
"completions/min_length": 79.8,
|
|
"completions/min_terminated_length": 99.0,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0009759237291291356,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 893241106.0,
|
|
"reward": 0.9537703156471252,
|
|
"reward_std": 0.06543072313070297,
|
|
"rewards/accuracy_reward": 0.53271484375,
|
|
"rewards/brier_reward": 0.7992340207099915,
|
|
"rewards/confidence_uniqueness_reward": 0.9499642372131347,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.14569487571716308,
|
|
"rewards/frontier_coverage_1": 0.14569487571716308,
|
|
"rewards/frontier_coverage_10": 0.14569487571716308,
|
|
"rewards/frontier_coverage_15": 0.13636898696422578,
|
|
"rewards/frontier_coverage_20": 0.09246301501989365,
|
|
"rewards/frontier_coverage_25": 0.06544329449534417,
|
|
"rewards/frontier_coverage_5": 0.14569487571716308,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.082806396484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11064963936805725,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0414031982421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0414031982421875,
|
|
"signal/advantage_abs_mean": 0.0498686358332634,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0498686358332634,
|
|
"signal/advantage_pre_scale_std": 0.09574073255062103,
|
|
"signal/advantage_std": 0.09574073255062103,
|
|
"signal/brier_reward/centered_abs_mean": 0.11890813261270523,
|
|
"signal/brier_reward/group_std_mean": 0.1514558345079422,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011890813149511814,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011890813149511814,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022554631531238555,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028490035980939864,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022554632276296617,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022554632276296617,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14929589331150056,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19481739699840545,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002134931227192283,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002134931227192283,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14929589331150056,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19481739699840545,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002134931227192283,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002134931227192283,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14929589331150056,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19481739699840545,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002134931227192283,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002134931227192283,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1377663642168045,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1799382120370865,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019700590055435896,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019700590055435896,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09448865950107574,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12360241562128067,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001351187820546329,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001351187820546329,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.061427921056747437,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07933543026447296,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008784192497842014,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008784192497842014,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14929589331150056,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19481739699840545,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002134931227192283,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002134931227192283,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2665301529748049,
|
|
"calibration/batch_distribution_entropy": 0.9337003352908582,
|
|
"calibration/buffer_distribution_entropy": 0.9544330118977861,
|
|
"calibration/confidence_entropy": 0.4758372413036889,
|
|
"calibration/coverage@0%": 0.009765625,
|
|
"calibration/coverage@1%": 0.009765625,
|
|
"calibration/coverage@10%": 0.1109375,
|
|
"calibration/coverage@15%": 0.23359375,
|
|
"calibration/coverage@20%": 0.326953125,
|
|
"calibration/coverage@25%": 0.430078125,
|
|
"calibration/coverage@30%": 0.547265625,
|
|
"calibration/coverage@5%": 0.053125,
|
|
"calibration/ece": 0.13886920481639256,
|
|
"calibration/mean_confidence": 0.5781809774169434,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 674.8,
|
|
"completions/max_terminated_length": 674.8,
|
|
"completions/mean_length": 196.86259765625,
|
|
"completions/mean_terminated_length": 196.88185119628906,
|
|
"completions/min_length": 80.8,
|
|
"completions/min_terminated_length": 100.8,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.001146928290836513,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 910243795.0,
|
|
"reward": 0.9781444787979126,
|
|
"reward_std": 0.06458824276924133,
|
|
"rewards/accuracy_reward": 0.59140625,
|
|
"rewards/brier_reward": 0.7921436786651611,
|
|
"rewards/confidence_uniqueness_reward": 0.9507281303405761,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.09182494133710861,
|
|
"rewards/frontier_coverage_1": 0.09182494133710861,
|
|
"rewards/frontier_coverage_10": 0.09182494133710861,
|
|
"rewards/frontier_coverage_15": 0.0870552383363247,
|
|
"rewards/frontier_coverage_20": 0.06757164672017098,
|
|
"rewards/frontier_coverage_25": 0.051709264516830444,
|
|
"rewards/frontier_coverage_5": 0.09182494133710861,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0930908203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12001070380210876,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04654541015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04654541015625,
|
|
"signal/advantage_abs_mean": 0.04986320808529854,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04986320808529854,
|
|
"signal/advantage_pre_scale_std": 0.09519556760787964,
|
|
"signal/advantage_std": 0.09519556760787964,
|
|
"signal/brier_reward/centered_abs_mean": 0.11273131370544434,
|
|
"signal/brier_reward/group_std_mean": 0.14456582069396973,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011273131892085075,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011273131892085075,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021527956053614617,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027080774307250977,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002152795670554042,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002152795670554042,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15594776272773742,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20174038112163545,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002230052975937724,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002230052975937724,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15594776272773742,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20174038112163545,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002230052975937724,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002230052975937724,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15594776272773742,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20174038112163545,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002230052975937724,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002230052975937724,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1416664719581604,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18316128849983215,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020258305361494423,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020258305361494423,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09624775648117065,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12451988160610199,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013763429131358861,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013763429131358861,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.059066733717918395,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0757571741938591,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008446542662568391,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008446542662568391,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15594776272773742,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20174038112163545,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002230052975937724,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002230052975937724,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3452574331468948,
|
|
"calibration/batch_distribution_entropy": 0.9465358312516123,
|
|
"calibration/buffer_distribution_entropy": 0.9557025260532088,
|
|
"calibration/confidence_entropy": 0.44704613050715925,
|
|
"calibration/coverage@0%": 0.019534307729941292,
|
|
"calibration/coverage@1%": 0.019534307729941292,
|
|
"calibration/coverage@10%": 0.05275807240704501,
|
|
"calibration/coverage@15%": 0.07698523116438356,
|
|
"calibration/coverage@20%": 0.17000596257338552,
|
|
"calibration/coverage@25%": 0.32013973825831704,
|
|
"calibration/coverage@30%": 0.4193860078277886,
|
|
"calibration/coverage@5%": 0.029319043542074364,
|
|
"calibration/ece": 0.14351819665830193,
|
|
"calibration/mean_confidence": 0.48057255943470245,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 477.8,
|
|
"completions/max_terminated_length": 477.8,
|
|
"completions/mean_length": 195.49287109375,
|
|
"completions/mean_terminated_length": 195.512158203125,
|
|
"completions/min_length": 78.6,
|
|
"completions/min_terminated_length": 98.2,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0009138612658716738,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 927392714.0,
|
|
"reward": 0.9463012337684631,
|
|
"reward_std": 0.06559450551867485,
|
|
"rewards/accuracy_reward": 0.51484375,
|
|
"rewards/brier_reward": 0.8050420165061951,
|
|
"rewards/confidence_uniqueness_reward": 0.9523642301559448,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.1560472682118416,
|
|
"rewards/frontier_coverage_1": 0.1560472682118416,
|
|
"rewards/frontier_coverage_10": 0.1560472682118416,
|
|
"rewards/frontier_coverage_15": 0.13759579956531526,
|
|
"rewards/frontier_coverage_20": 0.09614049047231674,
|
|
"rewards/frontier_coverage_25": 0.06428121700882912,
|
|
"rewards/frontier_coverage_5": 0.1560472682118416,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09307861328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1208904430270195,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046539306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.046539306640625,
|
|
"signal/advantage_abs_mean": 0.050657791644334794,
|
|
"signal/advantage_pre_scale_abs_mean": 0.050657791644334794,
|
|
"signal/advantage_pre_scale_std": 0.09581429213285446,
|
|
"signal/advantage_std": 0.09581429213285446,
|
|
"signal/brier_reward/centered_abs_mean": 0.10857920050621032,
|
|
"signal/brier_reward/group_std_mean": 0.13950212001800538,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010857920348644256,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010857920348644256,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01955595873296261,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02445173226296902,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019555958919227124,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019555958919227124,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15932937264442443,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20401280820369722,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15932937264442443,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20401280820369722,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15932937264442443,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20401280820369722,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13971571624279022,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17929893136024475,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019979347474873067,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019979347474873067,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09661854058504105,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12456222176551819,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013816451421007514,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013816451421007514,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05862758159637451,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07523611336946487,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008383744047023356,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008383744047023356,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15932937264442443,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20401280820369722,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022784100845456125,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3462076894170047,
|
|
"calibration/batch_distribution_entropy": 0.9425424465087536,
|
|
"calibration/buffer_distribution_entropy": 0.9567942420251466,
|
|
"calibration/confidence_entropy": 0.4741509430206251,
|
|
"calibration/coverage@0%": 0.027734375,
|
|
"calibration/coverage@1%": 0.027734375,
|
|
"calibration/coverage@10%": 0.08671875,
|
|
"calibration/coverage@15%": 0.146484375,
|
|
"calibration/coverage@20%": 0.20078125,
|
|
"calibration/coverage@25%": 0.3234375,
|
|
"calibration/coverage@30%": 0.47890625,
|
|
"calibration/coverage@5%": 0.03359375,
|
|
"calibration/ece": 0.14865247684944954,
|
|
"calibration/mean_confidence": 0.5141867801386891,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 458.6,
|
|
"completions/max_terminated_length": 458.6,
|
|
"completions/mean_length": 199.56806640625,
|
|
"completions/mean_terminated_length": 199.58777465820313,
|
|
"completions/min_length": 80.6,
|
|
"completions/min_terminated_length": 98.8,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.001016242429614067,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 944547139.0,
|
|
"reward": 0.9603770971298218,
|
|
"reward_std": 0.06262253299355507,
|
|
"rewards/accuracy_reward": 0.5482421875,
|
|
"rewards/brier_reward": 0.8026605606079101,
|
|
"rewards/confidence_uniqueness_reward": 0.9549548506736756,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.12388900071382522,
|
|
"rewards/frontier_coverage_1": 0.12388900071382522,
|
|
"rewards/frontier_coverage_10": 0.12388900071382522,
|
|
"rewards/frontier_coverage_15": 0.10794235169887542,
|
|
"rewards/frontier_coverage_20": 0.07755922675132751,
|
|
"rewards/frontier_coverage_25": 0.056235866993665694,
|
|
"rewards/frontier_coverage_5": 0.12388900071382522,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0889404296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12086254060268402,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04447021484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04447021484375,
|
|
"signal/advantage_abs_mean": 0.0463208869099617,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0463208869099617,
|
|
"signal/advantage_pre_scale_std": 0.09083856195211411,
|
|
"signal/advantage_std": 0.09083856195211411,
|
|
"signal/brier_reward/centered_abs_mean": 0.10326587110757827,
|
|
"signal/brier_reward/group_std_mean": 0.13239023983478546,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010326587595045567,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010326587595045567,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01764247938990593,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.022220425307750702,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017642479855567218,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017642479855567218,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.150966015458107,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19346502125263215,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002158814016729593,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002158814016729593,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.150966015458107,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19346502125263215,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002158814016729593,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002158814016729593,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.150966015458107,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19346502125263215,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002158814016729593,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002158814016729593,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12611357122659683,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.161882221698761,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018034240463748574,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018034240463748574,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08738774359226227,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11280497461557389,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001249644672498107,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001249644672498107,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.053803355991840364,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06944843530654907,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007693879655562341,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007693879655562341,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.150966015458107,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19346502125263215,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002158814016729593,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002158814016729593,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35427870476059475,
|
|
"calibration/batch_distribution_entropy": 0.9575071938085074,
|
|
"calibration/buffer_distribution_entropy": 0.9583187097828064,
|
|
"calibration/confidence_entropy": 0.49101955323962176,
|
|
"calibration/coverage@0%": 0.020703125,
|
|
"calibration/coverage@1%": 0.020703125,
|
|
"calibration/coverage@10%": 0.096875,
|
|
"calibration/coverage@15%": 0.135546875,
|
|
"calibration/coverage@20%": 0.290625,
|
|
"calibration/coverage@25%": 0.37890625,
|
|
"calibration/coverage@30%": 0.47890625,
|
|
"calibration/coverage@5%": 0.053125,
|
|
"calibration/ece": 0.14715639573538566,
|
|
"calibration/mean_confidence": 0.4909093624526076,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 503.4,
|
|
"completions/max_terminated_length": 503.4,
|
|
"completions/mean_length": 204.24189453125,
|
|
"completions/mean_terminated_length": 204.30109558105468,
|
|
"completions/min_length": 43.8,
|
|
"completions/min_terminated_length": 105.0,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.0006890616496093571,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 961689872.0,
|
|
"reward": 0.960372805595398,
|
|
"reward_std": 0.06250972747802734,
|
|
"rewards/accuracy_reward": 0.5474609375,
|
|
"rewards/brier_reward": 0.8091128706932068,
|
|
"rewards/confidence_uniqueness_reward": 0.9568856477737426,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_coverage_0": 0.1184326808899641,
|
|
"rewards/frontier_coverage_1": 0.1184326808899641,
|
|
"rewards/frontier_coverage_10": 0.1184326808899641,
|
|
"rewards/frontier_coverage_15": 0.10469010137021542,
|
|
"rewards/frontier_coverage_20": 0.07688896842300892,
|
|
"rewards/frontier_coverage_25": 0.05720534510910511,
|
|
"rewards/frontier_coverage_5": 0.1184326808899641,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08143310546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11135471612215042,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040716552734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.040716552734375,
|
|
"signal/advantage_abs_mean": 0.04637853130698204,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04637853130698204,
|
|
"signal/advantage_pre_scale_std": 0.08917968124151229,
|
|
"signal/advantage_std": 0.08917968124151229,
|
|
"signal/brier_reward/centered_abs_mean": 0.10574377328157425,
|
|
"signal/brier_reward/group_std_mean": 0.13551586270332336,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01057437751442194,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01057437751442194,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01588448788970709,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02095335051417351,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015884488122537733,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015884488122537733,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15329268872737883,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1948721706867218,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15329268872737883,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1948721706867218,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15329268872737883,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1948721706867218,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12959499955177306,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16456757485866547,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00185320854652673,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00185320854652673,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08761304467916489,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11146515905857086,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012528665363788604,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012528665363788604,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05668332800269127,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07238757461309434,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008105716085992753,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008105716085992753,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15329268872737883,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1948721706867218,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021920854225754736,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3856348088878695,
|
|
"calibration/batch_distribution_entropy": 0.9619417296860739,
|
|
"calibration/buffer_distribution_entropy": 0.9620296197016186,
|
|
"calibration/confidence_entropy": 0.5035797239780982,
|
|
"calibration/coverage@0%": 0.009386466487279843,
|
|
"calibration/coverage@1%": 0.009386466487279843,
|
|
"calibration/coverage@10%": 0.028926125244618395,
|
|
"calibration/coverage@15%": 0.044947101272015656,
|
|
"calibration/coverage@20%": 0.059801553326810176,
|
|
"calibration/coverage@25%": 0.19144982265166338,
|
|
"calibration/coverage@30%": 0.3309556934931507,
|
|
"calibration/coverage@5%": 0.01173480308219178,
|
|
"calibration/ece": 0.12592002169075053,
|
|
"calibration/mean_confidence": 0.5046425573200831,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 416.6,
|
|
"completions/max_terminated_length": 416.6,
|
|
"completions/mean_length": 204.05205078125,
|
|
"completions/mean_terminated_length": 204.11180114746094,
|
|
"completions/min_length": 40.4,
|
|
"completions/min_terminated_length": 101.0,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0007080600480549037,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 978806181.0,
|
|
"reward": 0.949990451335907,
|
|
"reward_std": 0.06062187701463699,
|
|
"rewards/accuracy_reward": 0.528125,
|
|
"rewards/brier_reward": 0.7971256971359253,
|
|
"rewards/confidence_uniqueness_reward": 0.9547683119773864,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_coverage_0": 0.12781327366828918,
|
|
"rewards/frontier_coverage_1": 0.12781327366828918,
|
|
"rewards/frontier_coverage_10": 0.12781327366828918,
|
|
"rewards/frontier_coverage_15": 0.11005319505929947,
|
|
"rewards/frontier_coverage_20": 0.07983717322349548,
|
|
"rewards/frontier_coverage_25": 0.060046466439962386,
|
|
"rewards/frontier_coverage_5": 0.12781327366828918,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076708984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10556017011404037,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383544921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0383544921875,
|
|
"signal/advantage_abs_mean": 0.04432180598378181,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04432180598378181,
|
|
"signal/advantage_pre_scale_std": 0.08894449770450592,
|
|
"signal/advantage_std": 0.08894449770450592,
|
|
"signal/brier_reward/centered_abs_mean": 0.10374047160148621,
|
|
"signal/brier_reward/group_std_mean": 0.13460810035467147,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010374047234654427,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010374047234654427,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017230465635657312,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.022360032051801683,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001723046530969441,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001723046530969441,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.141362664103508,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1842469871044159,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.141362664103508,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1842469871044159,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.141362664103508,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1842469871044159,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11770967096090316,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15376219451427459,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016832482069730759,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016832482069730759,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07940128147602081,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10401753634214402,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011354383546859025,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011354383546859025,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.053226197510957717,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06920340955257416,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007611346081830561,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007611346081830561,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.141362664103508,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1842469871044159,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020214861258864405,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25053891809513856,
|
|
"calibration/batch_distribution_entropy": 0.9541330489936669,
|
|
"calibration/buffer_distribution_entropy": 0.9650525357181208,
|
|
"calibration/confidence_entropy": 0.4900174379706102,
|
|
"calibration/coverage@0%": 0.0328125,
|
|
"calibration/coverage@1%": 0.0328125,
|
|
"calibration/coverage@10%": 0.205859375,
|
|
"calibration/coverage@15%": 0.31953125,
|
|
"calibration/coverage@20%": 0.42734375,
|
|
"calibration/coverage@25%": 0.519921875,
|
|
"calibration/coverage@30%": 0.601171875,
|
|
"calibration/coverage@5%": 0.11640625,
|
|
"calibration/ece": 0.08060715323311354,
|
|
"calibration/mean_confidence": 0.4891742077465079,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 451.0,
|
|
"completions/max_terminated_length": 451.0,
|
|
"completions/mean_length": 207.48515625,
|
|
"completions/mean_terminated_length": 207.48515625,
|
|
"completions/min_length": 105.6,
|
|
"completions/min_terminated_length": 105.6,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0009179720655083656,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 995906253.0,
|
|
"reward": 0.9544263243675232,
|
|
"reward_std": 0.0705165296792984,
|
|
"rewards/accuracy_reward": 0.5337890625,
|
|
"rewards/brier_reward": 0.8030768990516662,
|
|
"rewards/confidence_uniqueness_reward": 0.9553874850273132,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.1375230610370636,
|
|
"rewards/frontier_coverage_1": 0.1375230610370636,
|
|
"rewards/frontier_coverage_10": 0.13709916770458222,
|
|
"rewards/frontier_coverage_15": 0.12249395102262498,
|
|
"rewards/frontier_coverage_20": 0.08775933682918549,
|
|
"rewards/frontier_coverage_25": 0.06064917892217636,
|
|
"rewards/frontier_coverage_5": 0.1375230610370636,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10455322265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13749758303165435,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052276611328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052276611328125,
|
|
"signal/advantage_abs_mean": 0.05363398566842079,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05363398566842079,
|
|
"signal/advantage_pre_scale_std": 0.10147667974233628,
|
|
"signal/advantage_std": 0.10147667974233628,
|
|
"signal/brier_reward/centered_abs_mean": 0.1024449646472931,
|
|
"signal/brier_reward/group_std_mean": 0.13244157880544663,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01024449672549963,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01024449672549963,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016994761675596236,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.021610427275300027,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0016994762001559139,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016994762001559139,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15415203273296357,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1998533695936203,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022043741773813963,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022043741773813963,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15415203273296357,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1998533695936203,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022043741773813963,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022043741773813963,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1532199949026108,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19871186316013337,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002191046020016074,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002191046020016074,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13066325932741166,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16969827115535735,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018684846349060536,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018684846349060536,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08451437950134277,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11034028381109237,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012085556285455824,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012085556285455824,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05478915497660637,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07099926471710205,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007834849297069013,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007834849297069013,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15415203273296357,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1998533695936203,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022043741773813963,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022043741773813963,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32084141990480985,
|
|
"calibration/batch_distribution_entropy": 0.9631872285829151,
|
|
"calibration/buffer_distribution_entropy": 0.9660335700291272,
|
|
"calibration/confidence_entropy": 0.4529352593268487,
|
|
"calibration/coverage@0%": 0.007421875,
|
|
"calibration/coverage@1%": 0.007421875,
|
|
"calibration/coverage@10%": 0.129296875,
|
|
"calibration/coverage@15%": 0.28828125,
|
|
"calibration/coverage@20%": 0.334375,
|
|
"calibration/coverage@25%": 0.373046875,
|
|
"calibration/coverage@30%": 0.42578125,
|
|
"calibration/coverage@5%": 0.0359375,
|
|
"calibration/ece": 0.1582392307355111,
|
|
"calibration/mean_confidence": 0.5463262394170487,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 470.2,
|
|
"completions/max_terminated_length": 470.2,
|
|
"completions/mean_length": 211.44248046875,
|
|
"completions/mean_terminated_length": 211.4843017578125,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 101.8,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0007353639230132103,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 1013011744.0,
|
|
"reward": 0.9526812791824341,
|
|
"reward_std": 0.05707969143986702,
|
|
"rewards/accuracy_reward": 0.52734375,
|
|
"rewards/brier_reward": 0.8101108074188232,
|
|
"rewards/confidence_uniqueness_reward": 0.956527829170227,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_coverage_0": 0.14714392870664597,
|
|
"rewards/frontier_coverage_1": 0.14714392870664597,
|
|
"rewards/frontier_coverage_10": 0.1471204236149788,
|
|
"rewards/frontier_coverage_15": 0.1279465898871422,
|
|
"rewards/frontier_coverage_20": 0.0889292061328888,
|
|
"rewards/frontier_coverage_25": 0.06472631767392159,
|
|
"rewards/frontier_coverage_5": 0.14714392870664597,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076416015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1025225818157196,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0382080078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0382080078125,
|
|
"signal/advantage_abs_mean": 0.04289043098688126,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04289043098688126,
|
|
"signal/advantage_pre_scale_std": 0.08751718997955323,
|
|
"signal/advantage_std": 0.08751718997955323,
|
|
"signal/brier_reward/centered_abs_mean": 0.09678200632333755,
|
|
"signal/brier_reward/group_std_mean": 0.12593707144260408,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.009678200632333756,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.009678200632333756,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.017707385867834092,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.022501471638679504,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001770738698542118,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001770738698542118,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13480642139911653,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17521958649158478,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019277318846434356,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019277318846434356,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13480642139911653,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17521958649158478,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019277318846434356,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019277318846434356,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1338825672864914,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17401980459690095,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019145207479596138,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019145207479596138,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11472393870353699,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14965740144252776,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016405523056164384,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016405523056164384,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07549109011888504,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09850892573595046,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010795225854963065,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010795225854963065,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05492957755923271,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07022278383374214,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007854929543100297,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007854929543100297,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13480642139911653,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17521958649158478,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019277318846434356,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019277318846434356,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.43207962981231096,
|
|
"eval_calibration/batch_distribution_entropy": 0.9353238475943706,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9659055969626256,
|
|
"eval_calibration/confidence_entropy": 0.4578375000571542,
|
|
"eval_calibration/coverage@0%": 0.09375,
|
|
"eval_calibration/coverage@1%": 0.09375,
|
|
"eval_calibration/coverage@10%": 0.109375,
|
|
"eval_calibration/coverage@15%": 0.125,
|
|
"eval_calibration/coverage@20%": 0.1328125,
|
|
"eval_calibration/coverage@25%": 0.171875,
|
|
"eval_calibration/coverage@30%": 0.1796875,
|
|
"eval_calibration/coverage@5%": 0.09375,
|
|
"eval_calibration/ece": 0.19972512714813934,
|
|
"eval_calibration/mean_confidence": 0.4987985646481393,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 403.5,
|
|
"eval_completions/max_terminated_length": 403.5,
|
|
"eval_completions/mean_length": 216.9659881591797,
|
|
"eval_completions/mean_terminated_length": 216.9659881591797,
|
|
"eval_completions/min_length": 116.25,
|
|
"eval_completions/min_terminated_length": 116.25,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1013011744.0,
|
|
"eval_reward": 0.9047603160142899,
|
|
"eval_reward_std": 0.2327863685786724,
|
|
"eval_rewards/accuracy_reward": 0.435546875,
|
|
"eval_rewards/brier_reward": 0.8014965802431107,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.90576171875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_coverage_0": 0.1972401924431324,
|
|
"eval_rewards/frontier_coverage_1": 0.1972401924431324,
|
|
"eval_rewards/frontier_coverage_10": 0.1972401924431324,
|
|
"eval_rewards/frontier_coverage_15": 0.1668083593249321,
|
|
"eval_rewards/frontier_coverage_20": 0.11011006683111191,
|
|
"eval_rewards/frontier_coverage_25": 0.07125817239284515,
|
|
"eval_rewards/frontier_coverage_5": 0.1972401924431324,
|
|
"eval_runtime": 20.6578,
|
|
"eval_samples_per_second": 24.204,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4749755859375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49479666352272034,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23748779296875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23748779296875,
|
|
"eval_signal/advantage_abs_mean": 0.21877508983016014,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21877508983016014,
|
|
"eval_signal/advantage_pre_scale_std": 0.23022845014929771,
|
|
"eval_signal/advantage_std": 0.23022845014929771,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1955815851688385,
|
|
"eval_signal/brier_reward/group_std_mean": 0.24249068275094032,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019558158703148365,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019558158703148365,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.038177490234375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.045081330463290215,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003817749093286693,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003817749093286693,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3407173827290535,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.42629872262477875,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3407173827290535,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.42629872262477875,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3407173827290535,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.42629872262477875,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.28800592571496964,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3627534434199333,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004118484794162214,
|
|
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004118484794162214,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.17610583826899529,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.2276080958545208,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002518313529435545,
|
|
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002518313529435545,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.10346624068915844,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.13241487741470337,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014795672905165702,
|
|
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014795672905165702,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3407173827290535,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.42629872262477875,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00487225828692317,
|
|
"eval_steps_per_second": 0.194,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25253727756623107,
|
|
"calibration/batch_distribution_entropy": 0.9449117322882312,
|
|
"calibration/buffer_distribution_entropy": 0.9660246365706477,
|
|
"calibration/confidence_entropy": 0.4673776739365726,
|
|
"calibration/coverage@0%": 0.036328125,
|
|
"calibration/coverage@1%": 0.036328125,
|
|
"calibration/coverage@10%": 0.28046875,
|
|
"calibration/coverage@15%": 0.38125,
|
|
"calibration/coverage@20%": 0.495703125,
|
|
"calibration/coverage@25%": 0.562109375,
|
|
"calibration/coverage@30%": 0.616015625,
|
|
"calibration/coverage@5%": 0.0609375,
|
|
"calibration/ece": 0.11527308122425592,
|
|
"calibration/mean_confidence": 0.541623520405874,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 483.8,
|
|
"completions/max_terminated_length": 483.8,
|
|
"completions/mean_length": 214.971875,
|
|
"completions/mean_terminated_length": 214.99241638183594,
|
|
"completions/min_length": 87.4,
|
|
"completions/min_terminated_length": 108.6,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0008463452104479074,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1030074176.0,
|
|
"reward": 0.9660236597061157,
|
|
"reward_std": 0.06459108740091324,
|
|
"rewards/accuracy_reward": 0.55732421875,
|
|
"rewards/brier_reward": 0.8069370865821839,
|
|
"rewards/confidence_uniqueness_reward": 0.9547674655914307,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_coverage_0": 0.12958877347409725,
|
|
"rewards/frontier_coverage_1": 0.12958877347409725,
|
|
"rewards/frontier_coverage_10": 0.12983475551009177,
|
|
"rewards/frontier_coverage_15": 0.11786438822746277,
|
|
"rewards/frontier_coverage_20": 0.08397987484931946,
|
|
"rewards/frontier_coverage_25": 0.06556350365281105,
|
|
"rewards/frontier_coverage_5": 0.12958877347409725,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088421630859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11987629979848861,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442108154296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0442108154296875,
|
|
"signal/advantage_abs_mean": 0.048282912001013756,
|
|
"signal/advantage_pre_scale_abs_mean": 0.048282912001013756,
|
|
"signal/advantage_pre_scale_std": 0.09471045136451721,
|
|
"signal/advantage_std": 0.09471045136451721,
|
|
"signal/brier_reward/centered_abs_mean": 0.10018168091773987,
|
|
"signal/brier_reward/group_std_mean": 0.1312152311205864,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010018168576061725,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010018168576061725,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019286222010850906,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.024579422920942305,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0019286222057417035,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019286222057417035,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.140859717130661,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18376873433589935,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020142939407378434,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020142939407378434,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.140859717130661,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18376873433589935,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020142939407378434,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020142939407378434,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14001604318618774,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1826833665370941,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00200222940184176,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00200222940184176,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1187993735074997,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15566462874412537,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016988310497254133,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016988310497254133,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07476956397294998,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0980818435549736,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010692047653719783,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010692047653719783,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05499729737639427,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07091807499527931,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007864613551646471,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007864613551646471,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.140859717130661,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18376873433589935,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020142939407378434,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020142939407378434,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34024954900528936,
|
|
"calibration/batch_distribution_entropy": 0.9299159526632534,
|
|
"calibration/buffer_distribution_entropy": 0.9659855427044987,
|
|
"calibration/confidence_entropy": 0.4099067239170922,
|
|
"calibration/coverage@0%": 0.02421875,
|
|
"calibration/coverage@1%": 0.02421875,
|
|
"calibration/coverage@10%": 0.08984375,
|
|
"calibration/coverage@15%": 0.116015625,
|
|
"calibration/coverage@20%": 0.159765625,
|
|
"calibration/coverage@25%": 0.3703125,
|
|
"calibration/coverage@30%": 0.4921875,
|
|
"calibration/coverage@5%": 0.046484375,
|
|
"calibration/ece": 0.14085424638925065,
|
|
"calibration/mean_confidence": 0.48945767108986615,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 580.6,
|
|
"completions/max_terminated_length": 580.6,
|
|
"completions/mean_length": 209.4171875,
|
|
"completions/mean_terminated_length": 209.4171875,
|
|
"completions/min_length": 102.4,
|
|
"completions/min_terminated_length": 102.4,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0008461447432637215,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1047347088.0,
|
|
"reward": 0.952314579486847,
|
|
"reward_std": 0.057999057322740556,
|
|
"rewards/accuracy_reward": 0.5294921875,
|
|
"rewards/brier_reward": 0.8023276925086975,
|
|
"rewards/confidence_uniqueness_reward": 0.942218017578125,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_coverage_0": 0.15457661747932433,
|
|
"rewards/frontier_coverage_1": 0.15457661747932433,
|
|
"rewards/frontier_coverage_10": 0.15457661747932433,
|
|
"rewards/frontier_coverage_15": 0.13298805058002472,
|
|
"rewards/frontier_coverage_20": 0.09164022654294968,
|
|
"rewards/frontier_coverage_25": 0.07411976456642151,
|
|
"rewards/frontier_coverage_5": 0.15457661747932433,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07926025390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10321827828884125,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039630126953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039630126953125,
|
|
"signal/advantage_abs_mean": 0.04445498287677765,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04445498287677765,
|
|
"signal/advantage_pre_scale_std": 0.08984951674938202,
|
|
"signal/advantage_std": 0.08984951674938202,
|
|
"signal/brier_reward/centered_abs_mean": 0.10390263050794601,
|
|
"signal/brier_reward/group_std_mean": 0.13310863077640533,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010390263237059116,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010390263237059116,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02657153606414795,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03434660360217094,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002657153643667698,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002657153643667698,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13931848406791686,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1796002447605133,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00199225430842489,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00199225430842489,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13931848406791686,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1796002447605133,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00199225430842489,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00199225430842489,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13931848406791686,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1796002447605133,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00199225430842489,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00199225430842489,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11711540371179581,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1513279214501381,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016747502610087394,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016747502610087394,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07272039651870728,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0945111259818077,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010399016202427447,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010399016202427447,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.055749702453613284,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0714589461684227,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007972207386046648,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007972207386046648,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13931848406791686,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1796002447605133,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00199225430842489,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00199225430842489,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27388473601115615,
|
|
"calibration/batch_distribution_entropy": 0.8467881507069492,
|
|
"calibration/buffer_distribution_entropy": 0.9655108154257023,
|
|
"calibration/confidence_entropy": 0.38870360482432476,
|
|
"calibration/coverage@0%": 0.0400390625,
|
|
"calibration/coverage@1%": 0.0400390625,
|
|
"calibration/coverage@10%": 0.072265625,
|
|
"calibration/coverage@15%": 0.1328125,
|
|
"calibration/coverage@20%": 0.2412109375,
|
|
"calibration/coverage@25%": 0.4775390625,
|
|
"calibration/coverage@30%": 0.6171875,
|
|
"calibration/coverage@5%": 0.05078125,
|
|
"calibration/ece": 0.17844140625000002,
|
|
"calibration/mean_confidence": 0.6485000000000001,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 401.5,
|
|
"completions/max_terminated_length": 401.5,
|
|
"completions/mean_length": 205.3455810546875,
|
|
"completions/mean_terminated_length": 205.3455810546875,
|
|
"completions/min_length": 101.0,
|
|
"completions/min_terminated_length": 101.0,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1054193615.0,
|
|
"reward": 0.9565387666225433,
|
|
"reward_std": 0.06219491548836231,
|
|
"rewards/accuracy_reward": 0.55615234375,
|
|
"rewards/brier_reward": 0.7606430053710938,
|
|
"rewards/confidence_uniqueness_reward": 0.9483394622802734,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_coverage_0": 0.0879761092364788,
|
|
"rewards/frontier_coverage_1": 0.0879761092364788,
|
|
"rewards/frontier_coverage_10": 0.08799909055233002,
|
|
"rewards/frontier_coverage_15": 0.07236327230930328,
|
|
"rewards/frontier_coverage_20": 0.054976701736450195,
|
|
"rewards/frontier_coverage_25": 0.049710165709257126,
|
|
"rewards/frontier_coverage_5": 0.0879761092364788,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080535888671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10539381578564644,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0402679443359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0402679443359375,
|
|
"signal/advantage_abs_mean": 0.04706815257668495,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04706815257668495,
|
|
"signal/advantage_pre_scale_std": 0.09463966637849808,
|
|
"signal/advantage_std": 0.09463966637849808,
|
|
"signal/brier_reward/centered_abs_mean": 0.11788154020905495,
|
|
"signal/brier_reward/group_std_mean": 0.14912863820791245,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011788154020905495,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011788154020905495,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02400451898574829,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029955977573990822,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024004519800655544,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024004519800655544,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13757885247468948,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17696572095155716,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001967377553228289,
|
|
"signal/frontier_coverage_0/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001967377553228289,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13757885247468948,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17696572095155716,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001967377553228289,
|
|
"signal/frontier_coverage_1/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001967377553228289,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13729550689458847,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1765838861465454,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019633257179521024,
|
|
"signal/frontier_coverage_10/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019633257179521024,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11528988182544708,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14871473610401154,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016486452659592032,
|
|
"signal/frontier_coverage_15/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016486452659592032,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07046542316675186,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09160730615258217,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010076555190607905,
|
|
"signal/frontier_coverage_20/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010076555190607905,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05335330776870251,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06834409385919571,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007629523170180619,
|
|
"signal/frontier_coverage_25/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007629523170180619,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13757885247468948,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17696572095155716,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001967377553228289,
|
|
"signal/frontier_coverage_5/weight": 0.014299999922513962,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001967377553228289,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 7.492675094675715e-05,
|
|
"train_runtime": 59844.158,
|
|
"train_samples_per_second": 0.334,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1054193615,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|