9940 lines
609 KiB
JSON
9940 lines
609 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.6224643780433982,
|
|
"calibration/batch_distribution_entropy": 0.6544897379113672,
|
|
"calibration/batch_entropy_100bins": 0.4863029185249278,
|
|
"calibration/batch_entropy_10bins": 0.6544897379113672,
|
|
"calibration/batch_entropy_50bins": 0.5701004408606952,
|
|
"calibration/batch_uniqueness": 0.728444991952043,
|
|
"calibration/confidence_entropy": 0.34767197334474165,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4841265820092806,
|
|
"calibration/mean_confidence": 0.7931767989389904,
|
|
"calibration/prompt_uniqueness": 0.6103076405494752,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0353515625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1506.0,
|
|
"completions/mean_length": 271.31015625,
|
|
"completions/mean_terminated_length": 224.96051940917968,
|
|
"completions/min_length": 1.8,
|
|
"completions/min_terminated_length": 1.8,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.053285811096429825,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0705,
|
|
"num_tokens": 17622248.0,
|
|
"reward": 0.49289684891700747,
|
|
"reward_std": 0.3958591163158417,
|
|
"rewards/accuracy_reward": 0.2240234375,
|
|
"rewards/brier_reward": 0.37564998865127563,
|
|
"rewards/format_reward": 0.67802734375,
|
|
"rewards/frontier_aurc_reward": 0.3027165472507477,
|
|
"rewards/frontier_coverage_1": 0.3027165472507477,
|
|
"rewards/frontier_coverage_10": 0.3027165472507477,
|
|
"rewards/frontier_coverage_15": 0.3027165472507477,
|
|
"rewards/frontier_coverage_20": 0.3027165472507477,
|
|
"rewards/frontier_coverage_25": 0.3027165472507477,
|
|
"rewards/frontier_coverage_5": 0.3027165472507477,
|
|
"rewards/frontier_ece_reward": 0.3027165472507477,
|
|
"rewards/frontier_entropy_batch_reward": -0.6468378663063049,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.24012451171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.210546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2819916486740112,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.315625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.120062255859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.120062255859375,
|
|
"signal/advantage_abs_mean": 0.33822785019874574,
|
|
"signal/advantage_pre_scale_abs_mean": 0.33822785019874574,
|
|
"signal/advantage_pre_scale_std": 0.40998163223266604,
|
|
"signal/advantage_std": 0.40998163223266604,
|
|
"signal/brier_reward/centered_abs_mean": 0.320052570104599,
|
|
"signal/brier_reward/group_bin_occupancy": 0.745703125,
|
|
"signal/brier_reward/group_std_mean": 0.36434565782546996,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04000657126307487,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.04000657126307487,
|
|
"signal/format_reward/centered_abs_mean": 0.408428955078125,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.45669829845428467,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2042144775390625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2042144775390625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005222111754119396,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.29173809885978697,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.662890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.34154740571975706,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03646726235747337,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03646726235747337,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4318214237689972,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.308203125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4762145817279816,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05397767797112465,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05397767797112465,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6528287664498039,
|
|
"calibration/batch_distribution_entropy": 0.6268190631519651,
|
|
"calibration/batch_entropy_100bins": 0.4738420714903396,
|
|
"calibration/batch_entropy_10bins": 0.6268190631519651,
|
|
"calibration/batch_entropy_50bins": 0.554670162377654,
|
|
"calibration/batch_uniqueness": 0.705548191421581,
|
|
"calibration/confidence_entropy": 0.33243235077831834,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5101970205206909,
|
|
"calibration/mean_confidence": 0.7973768305353158,
|
|
"calibration/prompt_uniqueness": 0.5841085893747472,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.037109375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1523.4,
|
|
"completions/mean_length": 266.923828125,
|
|
"completions/mean_terminated_length": 218.05512084960938,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.021607212722301483,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0713,
|
|
"num_tokens": 35455900.0,
|
|
"reward": 0.5007035851478576,
|
|
"reward_std": 0.378324830532074,
|
|
"rewards/accuracy_reward": 0.2111328125,
|
|
"rewards/brier_reward": 0.37633253931999205,
|
|
"rewards/format_reward": 0.71708984375,
|
|
"rewards/frontier_aurc_reward": 0.2987588942050934,
|
|
"rewards/frontier_coverage_1": 0.2987588942050934,
|
|
"rewards/frontier_coverage_10": 0.2987588942050934,
|
|
"rewards/frontier_coverage_15": 0.2987588942050934,
|
|
"rewards/frontier_coverage_20": 0.2987588942050934,
|
|
"rewards/frontier_coverage_25": 0.2987588942050934,
|
|
"rewards/frontier_coverage_5": 0.2987588942050934,
|
|
"rewards/frontier_ece_reward": 0.2987588942050934,
|
|
"rewards/frontier_entropy_batch_reward": -0.6818291902542114,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.22430419921875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.209765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2700383305549622,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.321875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.112152099609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.112152099609375,
|
|
"signal/advantage_abs_mean": 0.3131078124046326,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3131078124046326,
|
|
"signal/advantage_pre_scale_std": 0.3932444155216217,
|
|
"signal/advantage_std": 0.3932444155216217,
|
|
"signal/brier_reward/centered_abs_mean": 0.3091658055782318,
|
|
"signal/brier_reward/group_bin_occupancy": 0.756640625,
|
|
"signal/brier_reward/group_std_mean": 0.3574398994445801,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.038645725697278976,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.038645725697278976,
|
|
"signal/format_reward/centered_abs_mean": 0.377227783203125,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.43797464966773986,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1886138916015625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1886138916015625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005058318562805653,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2825876474380493,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.33760352730751036,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.035323455929756165,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.035323455929756165,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4059325873851776,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.315625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4605302751064301,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0507415734231472,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0507415734231472,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.616971247492587,
|
|
"calibration/batch_distribution_entropy": 0.6409075565843491,
|
|
"calibration/batch_entropy_100bins": 0.4772482403805065,
|
|
"calibration/batch_entropy_10bins": 0.6409075565843491,
|
|
"calibration/batch_entropy_50bins": 0.5566394338845917,
|
|
"calibration/batch_uniqueness": 0.7049275200846712,
|
|
"calibration/buffer_distribution_entropy": 0.6588770403392903,
|
|
"calibration/buffer_entropy_100bins": 0.49200675404876176,
|
|
"calibration/buffer_entropy_10bins": 0.6588770403392903,
|
|
"calibration/buffer_entropy_50bins": 0.5748770253713922,
|
|
"calibration/confidence_entropy": 0.34808615842826207,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4785652202870275,
|
|
"calibration/mean_confidence": 0.8042481872309974,
|
|
"calibration/prompt_uniqueness": 0.6151282665489992,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1420.2,
|
|
"completions/mean_length": 204.5537109375,
|
|
"completions/mean_terminated_length": 183.505908203125,
|
|
"completions/min_length": 3.2,
|
|
"completions/min_terminated_length": 3.2,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.011996953748166561,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0439,
|
|
"num_tokens": 52599266.0,
|
|
"reward": 0.608043098449707,
|
|
"reward_std": 0.30430689454078674,
|
|
"rewards/accuracy_reward": 0.274609375,
|
|
"rewards/brier_reward": 0.4852728068828583,
|
|
"rewards/format_reward": 0.8759765625,
|
|
"rewards/frontier_aurc_reward": 0.30132074588909746,
|
|
"rewards/frontier_coverage_1": 0.3177640035748482,
|
|
"rewards/frontier_coverage_10": 0.3177640035748482,
|
|
"rewards/frontier_coverage_15": 0.3177640035748482,
|
|
"rewards/frontier_coverage_20": 0.3177640035748482,
|
|
"rewards/frontier_coverage_25": 0.3177640035748482,
|
|
"rewards/frontier_coverage_5": 0.3177640035748482,
|
|
"rewards/frontier_ece_reward": 0.2919433981180191,
|
|
"rewards/frontier_entropy_batch_reward": -0.8313869953155517,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2015869140625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2485917925834656,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10079345703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10079345703125,
|
|
"signal/advantage_abs_mean": 0.2351256161928177,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2351256161928177,
|
|
"signal/advantage_pre_scale_std": 0.31978016495704653,
|
|
"signal/advantage_std": 0.31978016495704653,
|
|
"signal/brier_reward/centered_abs_mean": 0.2747634917497635,
|
|
"signal/brier_reward/group_bin_occupancy": 0.794921875,
|
|
"signal/brier_reward/group_std_mean": 0.330036336183548,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03434543646872044,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03434543646872044,
|
|
"signal/format_reward/centered_abs_mean": 0.20308837890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.24375,
|
|
"signal/format_reward/group_std_mean": 0.3047700166702271,
|
|
"signal/format_reward/group_zero_std_frac": 0.05,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.101544189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.101544189453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.21934852562844753,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.735546875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.2648093054071069,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003926338179735467,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003926338179735467,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23966625183820725,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29589507579803465,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23966625183820725,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29589507579803465,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23966625183820725,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29589507579803465,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23966625183820725,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29589507579803465,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23966625183820725,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29589507579803465,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23966625183820725,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29589507579803465,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004290025448426604,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2458495855331421,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.716796875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.29648907482624054,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030731198191642762,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030731198191642762,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2641173452138901,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.340625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3699604392051697,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03301466815173626,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03301466815173626,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.53447438580555,
|
|
"calibration/batch_distribution_entropy": 0.697833452595151,
|
|
"calibration/batch_entropy_100bins": 0.5162966612875538,
|
|
"calibration/batch_entropy_10bins": 0.697833452595151,
|
|
"calibration/batch_entropy_50bins": 0.601463977565162,
|
|
"calibration/batch_uniqueness": 0.7540542644025435,
|
|
"calibration/buffer_distribution_entropy": 0.6562396115815237,
|
|
"calibration/buffer_entropy_100bins": 0.49265615739430785,
|
|
"calibration/buffer_entropy_10bins": 0.6562396115815237,
|
|
"calibration/buffer_entropy_50bins": 0.5747150103358988,
|
|
"calibration/confidence_entropy": 0.361725120029885,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3901462729077969,
|
|
"calibration/mean_confidence": 0.7812206546712639,
|
|
"calibration/prompt_uniqueness": 0.6691434151673705,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.004296875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1216.6,
|
|
"completions/mean_length": 143.82763671875,
|
|
"completions/mean_terminated_length": 137.8384002685547,
|
|
"completions/min_length": 34.0,
|
|
"completions/min_terminated_length": 34.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.0032386924140155315,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0098,
|
|
"num_tokens": 68990461.0,
|
|
"reward": 0.6213819026947022,
|
|
"reward_std": 0.20088508129119872,
|
|
"rewards/accuracy_reward": 0.3435546875,
|
|
"rewards/brier_reward": 0.5725475311279297,
|
|
"rewards/format_reward": 0.980078125,
|
|
"rewards/frontier_aurc_reward": -0.007014566846191883,
|
|
"rewards/frontier_coverage_1": 0.06326824426651001,
|
|
"rewards/frontier_coverage_10": 0.06326824426651001,
|
|
"rewards/frontier_coverage_15": 0.06326824426651001,
|
|
"rewards/frontier_coverage_20": 0.06326824426651001,
|
|
"rewards/frontier_coverage_25": 0.06326824426651001,
|
|
"rewards/frontier_coverage_5": 0.06326824426651001,
|
|
"rewards/frontier_ece_reward": -0.046458789124153556,
|
|
"rewards/frontier_entropy_batch_reward": -0.9029202818870544,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2005615234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.207421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.25139918029308317,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.340625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10028076171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10028076171875,
|
|
"signal/advantage_abs_mean": 0.15537169873714446,
|
|
"signal/advantage_pre_scale_abs_mean": 0.15537169873714446,
|
|
"signal/advantage_pre_scale_std": 0.21755909621715547,
|
|
"signal/advantage_std": 0.21755909621715547,
|
|
"signal/brier_reward/centered_abs_mean": 0.24199655055999755,
|
|
"signal/brier_reward/group_bin_occupancy": 0.828515625,
|
|
"signal/brier_reward/group_std_mean": 0.298342889547348,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030249568819999694,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.030249568819999694,
|
|
"signal/format_reward/centered_abs_mean": 0.03763427734375,
|
|
"signal/format_reward/group_bin_occupancy": 0.18203125,
|
|
"signal/format_reward/group_std_mean": 0.0920264482498169,
|
|
"signal/format_reward/group_zero_std_frac": 0.54375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.018817138671875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.018817138671875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005011124256998301,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.75546875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006882566865533591,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.969911868916824e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.969911868916824e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10851092785596847,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.6796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16966440081596373,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10851092785596847,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.6796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16966440081596373,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10851092785596847,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.6796875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16966440081596373,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10851092785596847,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.6796875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16966440081596373,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10851092785596847,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.6796875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16966440081596373,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10851092785596847,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.6796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16966440081596373,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019423455698415637,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.14095230400562286,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.76015625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16866419315338135,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.017619038000702858,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.017619038000702858,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16997582614421844,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.358984375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2987139880657196,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.071875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021246978268027305,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021246978268027305,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6205064937493211,
|
|
"calibration/batch_distribution_entropy": 0.8188591589225596,
|
|
"calibration/batch_entropy_100bins": 0.6092270818068604,
|
|
"calibration/batch_entropy_10bins": 0.8188591589225596,
|
|
"calibration/batch_entropy_50bins": 0.6969438827371384,
|
|
"calibration/batch_uniqueness": 0.8361927117817313,
|
|
"calibration/buffer_distribution_entropy": 0.6856559337312504,
|
|
"calibration/buffer_entropy_100bins": 0.5146458039959066,
|
|
"calibration/buffer_entropy_10bins": 0.6856559337312504,
|
|
"calibration/buffer_entropy_50bins": 0.5978699532757095,
|
|
"calibration/confidence_entropy": 0.4192333208619353,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3946832247035178,
|
|
"calibration/mean_confidence": 0.705443334403485,
|
|
"calibration/prompt_uniqueness": 0.7620859228603916,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00126953125,
|
|
"completions/max_length": 1378.0,
|
|
"completions/max_terminated_length": 926.6,
|
|
"completions/mean_length": 121.5697265625,
|
|
"completions/mean_terminated_length": 119.77201538085937,
|
|
"completions/min_length": 28.2,
|
|
"completions/min_terminated_length": 28.2,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.012476031668484211,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0025,
|
|
"num_tokens": 85168487.0,
|
|
"reward": 0.6544445514678955,
|
|
"reward_std": 0.18224802613258362,
|
|
"rewards/accuracy_reward": 0.36328125,
|
|
"rewards/brier_reward": 0.6255658030509949,
|
|
"rewards/format_reward": 0.99404296875,
|
|
"rewards/frontier_aurc_reward": -0.006063262652605772,
|
|
"rewards/frontier_coverage_1": 0.07632581368088723,
|
|
"rewards/frontier_coverage_10": 0.07632581368088723,
|
|
"rewards/frontier_coverage_15": 0.07632581368088723,
|
|
"rewards/frontier_coverage_20": 0.07632581368088723,
|
|
"rewards/frontier_coverage_25": 0.07632581368088723,
|
|
"rewards/frontier_coverage_5": 0.07632581368088723,
|
|
"rewards/frontier_ece_reward": -0.03705122843384743,
|
|
"rewards/frontier_entropy_batch_reward": -0.8469658613204956,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.188232421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2347244828939438,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0941162109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0941162109375,
|
|
"signal/advantage_abs_mean": 0.14390270113945008,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14390270113945008,
|
|
"signal/advantage_pre_scale_std": 0.1975090980529785,
|
|
"signal/advantage_std": 0.1975090980529785,
|
|
"signal/brier_reward/centered_abs_mean": 0.23567027747631072,
|
|
"signal/brier_reward/group_bin_occupancy": 0.867578125,
|
|
"signal/brier_reward/group_std_mean": 0.28802819848060607,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02945878468453884,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02945878468453884,
|
|
"signal/format_reward/centered_abs_mean": 0.011297607421875,
|
|
"signal/format_reward/group_bin_occupancy": 0.144921875,
|
|
"signal/format_reward/group_std_mean": 0.029941194131970404,
|
|
"signal/format_reward/group_zero_std_frac": 0.840625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0056488037109375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0056488037109375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0037031634245067837,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.751953125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005138655751943588,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.628662376897409e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.628662376897409e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1505482792854309,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.77265625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21810686886310576,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1505482792854309,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.77265625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21810686886310576,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1505482792854309,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.77265625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21810686886310576,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1505482792854309,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.77265625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21810686886310576,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1505482792854309,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.77265625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21810686886310576,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1505482792854309,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.77265625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21810686886310576,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002694814093410969,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1332566112279892,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.82265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16948509812355042,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01665707640349865,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01665707640349865,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2531877249479294,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.450390625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3905863881111145,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031648465618491176,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031648465618491176,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6337002880818469,
|
|
"calibration/batch_distribution_entropy": 0.9515234692633457,
|
|
"calibration/batch_entropy_100bins": 0.7790651280496668,
|
|
"calibration/batch_entropy_10bins": 0.9515234692633457,
|
|
"calibration/batch_entropy_50bins": 0.848934936739482,
|
|
"calibration/batch_uniqueness": 0.909145581071251,
|
|
"calibration/buffer_distribution_entropy": 0.7460870471656004,
|
|
"calibration/buffer_entropy_100bins": 0.565529106007607,
|
|
"calibration/buffer_entropy_10bins": 0.7460870471656004,
|
|
"calibration/buffer_entropy_50bins": 0.6492569955197386,
|
|
"calibration/confidence_entropy": 0.49321506704742396,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.29262828029023924,
|
|
"calibration/mean_confidence": 0.5433459062155098,
|
|
"calibration/prompt_uniqueness": 0.8469597941687622,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0021484375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 929.8,
|
|
"completions/mean_length": 118.1986328125,
|
|
"completions/mean_terminated_length": 115.14469146728516,
|
|
"completions/min_length": 38.6,
|
|
"completions/min_terminated_length": 38.6,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.0032025109976530075,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0048,
|
|
"num_tokens": 101423449.0,
|
|
"reward": 0.6895796895027161,
|
|
"reward_std": 0.17854192554950715,
|
|
"rewards/accuracy_reward": 0.35771484375,
|
|
"rewards/brier_reward": 0.6795345783233643,
|
|
"rewards/format_reward": 0.99248046875,
|
|
"rewards/frontier_aurc_reward": -0.005398123059421778,
|
|
"rewards/frontier_coverage_1": 0.10967106521129608,
|
|
"rewards/frontier_coverage_10": 0.10967106521129608,
|
|
"rewards/frontier_coverage_15": 0.10967106521129608,
|
|
"rewards/frontier_coverage_20": 0.10967106521129608,
|
|
"rewards/frontier_coverage_25": 0.10967106521129608,
|
|
"rewards/frontier_coverage_5": 0.10967106521129608,
|
|
"rewards/frontier_ece_reward": -0.026541496440768243,
|
|
"rewards/frontier_entropy_batch_reward": -0.6305931687355042,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.189324951171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2379360795021057,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0946624755859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0946624755859375,
|
|
"signal/advantage_abs_mean": 0.1388890862464905,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1388890862464905,
|
|
"signal/advantage_pre_scale_std": 0.1868252784013748,
|
|
"signal/advantage_std": 0.1868252784013748,
|
|
"signal/brier_reward/centered_abs_mean": 0.2370523989200592,
|
|
"signal/brier_reward/group_bin_occupancy": 0.90390625,
|
|
"signal/brier_reward/group_std_mean": 0.288687926530838,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0296315498650074,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0296315498650074,
|
|
"signal/format_reward/centered_abs_mean": 0.014483642578125,
|
|
"signal/format_reward/group_bin_occupancy": 0.15234375,
|
|
"signal/format_reward/group_std_mean": 0.04018273241817951,
|
|
"signal/format_reward/group_zero_std_frac": 0.78125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0072418212890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0072418212890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002838827669620514,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004378228541463613,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0815014401450756e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0815014401450756e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23764651417732238,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.908984375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3083926856517792,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23764651417732238,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.908984375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3083926856517792,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23764651417732238,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.908984375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3083926856517792,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23764651417732238,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.908984375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3083926856517792,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23764651417732238,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.908984375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3083926856517792,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23764651417732238,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.908984375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3083926856517792,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004253872437402606,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.11303882747888565,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83671875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.15580815970897674,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014129853434860706,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014129853434860706,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4395421028137207,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.62890625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5271062850952148,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.054942762851715087,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.054942762851715087,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.511020845461892,
|
|
"calibration/batch_distribution_entropy": 0.9269066170510761,
|
|
"calibration/batch_entropy_100bins": 0.9245069542553294,
|
|
"calibration/batch_entropy_10bins": 0.9269066170510761,
|
|
"calibration/batch_entropy_50bins": 0.9373367432265832,
|
|
"calibration/batch_uniqueness": 0.9446476489301399,
|
|
"calibration/buffer_distribution_entropy": 0.829412139862835,
|
|
"calibration/buffer_entropy_100bins": 0.6628959346066731,
|
|
"calibration/buffer_entropy_10bins": 0.829412139862835,
|
|
"calibration/buffer_entropy_50bins": 0.7391661237028551,
|
|
"calibration/confidence_entropy": 0.5010959880267044,
|
|
"calibration/coverage@0%": 0.004707438321562174,
|
|
"calibration/coverage@1%": 0.004707438321562174,
|
|
"calibration/coverage@10%": 0.004707438321562174,
|
|
"calibration/coverage@15%": 0.005100365630010111,
|
|
"calibration/coverage@20%": 0.00745639160473512,
|
|
"calibration/coverage@25%": 0.01687046764772599,
|
|
"calibration/coverage@30%": 0.0360861539222358,
|
|
"calibration/coverage@5%": 0.004707438321562174,
|
|
"calibration/ece": 0.19524205952621834,
|
|
"calibration/mean_confidence": 0.3591607106056175,
|
|
"calibration/prompt_uniqueness": 0.8846453539155539,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00185546875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 867.0,
|
|
"completions/mean_length": 117.60078125,
|
|
"completions/mean_terminated_length": 114.96354064941406,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.002222000854089856,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0057,
|
|
"num_tokens": 117737153.0,
|
|
"reward": 0.7422285556793213,
|
|
"reward_std": 0.14107392430305482,
|
|
"rewards/accuracy_reward": 0.387109375,
|
|
"rewards/brier_reward": 0.7211146593093872,
|
|
"rewards/format_reward": 0.99482421875,
|
|
"rewards/frontier_aurc_reward": -0.0045266709290444854,
|
|
"rewards/frontier_coverage_1": 0.13778235018253326,
|
|
"rewards/frontier_coverage_10": 0.13778235018253326,
|
|
"rewards/frontier_coverage_15": 0.13778235018253326,
|
|
"rewards/frontier_coverage_20": 0.13778235018253326,
|
|
"rewards/frontier_coverage_25": 0.13778235018253326,
|
|
"rewards/frontier_coverage_5": 0.13778235018253326,
|
|
"rewards/frontier_ece_reward": -0.0034593752585351466,
|
|
"rewards/frontier_entropy_batch_reward": -0.42529548406600953,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.190576171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.24057506322860717,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0952880859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0952880859375,
|
|
"signal/advantage_abs_mean": 0.10944210141897201,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10944210141897201,
|
|
"signal/advantage_pre_scale_std": 0.1517003059387207,
|
|
"signal/advantage_std": 0.1517003059387207,
|
|
"signal/brier_reward/centered_abs_mean": 0.20343652367591858,
|
|
"signal/brier_reward/group_bin_occupancy": 0.880078125,
|
|
"signal/brier_reward/group_std_mean": 0.2546698063611984,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025429565459489822,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.025429565459489822,
|
|
"signal/format_reward/centered_abs_mean": 0.009954833984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.14375,
|
|
"signal/format_reward/group_std_mean": 0.027537884190678596,
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0049774169921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0049774169921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012978114187717437,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72578125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021451528184115885,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3230824081110767e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3230824081110767e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.317331862449646,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.941796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3929042756557465,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.317331862449646,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.941796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3929042756557465,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.317331862449646,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.941796875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3929042756557465,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.317331862449646,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.941796875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3929042756557465,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.317331862449646,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.941796875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3929042756557465,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.317331862449646,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.941796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3929042756557465,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005680239945650101,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05593574643135071,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.801953125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08778993785381317,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006991968303918838,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006991968303918838,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.448394775390625,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5047555208206177,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.056049346923828125,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.056049346923828125,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5883847168275128,
|
|
"calibration/batch_distribution_entropy": 0.91090173474976,
|
|
"calibration/batch_entropy_100bins": 0.931093997601525,
|
|
"calibration/batch_entropy_10bins": 0.91090173474976,
|
|
"calibration/batch_entropy_50bins": 0.9358617585488869,
|
|
"calibration/batch_uniqueness": 0.9426319952686615,
|
|
"calibration/buffer_distribution_entropy": 0.893493430028969,
|
|
"calibration/buffer_entropy_100bins": 0.7499790530020946,
|
|
"calibration/buffer_entropy_10bins": 0.893493430028969,
|
|
"calibration/buffer_entropy_50bins": 0.8144149519247129,
|
|
"calibration/confidence_entropy": 0.5064471270952413,
|
|
"calibration/coverage@0%": 0.002359110808594898,
|
|
"calibration/coverage@1%": 0.002359110808594898,
|
|
"calibration/coverage@10%": 0.002359110808594898,
|
|
"calibration/coverage@15%": 0.002359110808594898,
|
|
"calibration/coverage@20%": 0.005897777021487245,
|
|
"calibration/coverage@25%": 0.005897777021487245,
|
|
"calibration/coverage@30%": 0.006290704329935182,
|
|
"calibration/coverage@5%": 0.002359110808594898,
|
|
"calibration/ece": 0.19177551118694733,
|
|
"calibration/mean_confidence": 0.3377996746346041,
|
|
"calibration/prompt_uniqueness": 0.8836925240634755,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00283203125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 896.0,
|
|
"completions/mean_length": 122.0451171875,
|
|
"completions/mean_terminated_length": 118.02766723632813,
|
|
"completions/min_length": 40.2,
|
|
"completions/min_terminated_length": 40.2,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.0016771440859884024,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0077,
|
|
"num_tokens": 133903567.0,
|
|
"reward": 0.7402719259262085,
|
|
"reward_std": 0.13263684809207915,
|
|
"rewards/accuracy_reward": 0.380078125,
|
|
"rewards/brier_reward": 0.7196099877357482,
|
|
"rewards/format_reward": 0.9953125,
|
|
"rewards/frontier_aurc_reward": -0.0044641831889748575,
|
|
"rewards/frontier_coverage_1": 0.1357348829507828,
|
|
"rewards/frontier_coverage_10": 0.1357348829507828,
|
|
"rewards/frontier_coverage_15": 0.1357348829507828,
|
|
"rewards/frontier_coverage_20": 0.1357348829507828,
|
|
"rewards/frontier_coverage_25": 0.1357348829507828,
|
|
"rewards/frontier_coverage_5": 0.1357348829507828,
|
|
"rewards/frontier_ece_reward": 0.0004683260805904865,
|
|
"rewards/frontier_entropy_batch_reward": -0.4154496967792511,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.174169921875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.203515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2256518006324768,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.371875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0870849609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0870849609375,
|
|
"signal/advantage_abs_mean": 0.10101482570171356,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10101482570171356,
|
|
"signal/advantage_pre_scale_std": 0.1429665595293045,
|
|
"signal/advantage_std": 0.1429665595293045,
|
|
"signal/brier_reward/centered_abs_mean": 0.19600196480751036,
|
|
"signal/brier_reward/group_bin_occupancy": 0.88515625,
|
|
"signal/brier_reward/group_std_mean": 0.24760811030864716,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024500245600938796,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024500245600938796,
|
|
"signal/format_reward/centered_abs_mean": 0.00906982421875,
|
|
"signal/format_reward/group_bin_occupancy": 0.143359375,
|
|
"signal/format_reward/group_std_mean": 0.026180195435881615,
|
|
"signal/format_reward/group_zero_std_frac": 0.853125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004534912109375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.004534912109375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013354318216443062,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.773828125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020624040393158794,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3904228874016555e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3904228874016555e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3095270454883575,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.940234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3819129645824432,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3095270454883575,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.940234375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3819129645824432,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3095270454883575,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.940234375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3819129645824432,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3095270454883575,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.940234375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3819129645824432,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3095270454883575,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.940234375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3819129645824432,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3095270454883575,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.940234375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3819129645824432,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005540534015744924,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.048538880050182344,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07725905627012253,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006067360006272793,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006067360006272793,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4226137399673462,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.77734375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4835656762123108,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.052826717495918274,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.052826717495918274,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4476721577377777,
|
|
"calibration/batch_distribution_entropy": 0.9775319646696126,
|
|
"calibration/batch_entropy_100bins": 0.965599725401858,
|
|
"calibration/batch_entropy_10bins": 0.9775319646696126,
|
|
"calibration/batch_entropy_50bins": 0.9741778975790041,
|
|
"calibration/batch_uniqueness": 0.9526775737262507,
|
|
"calibration/buffer_distribution_entropy": 0.9258372964444268,
|
|
"calibration/buffer_entropy_100bins": 0.8048134162077932,
|
|
"calibration/buffer_entropy_10bins": 0.9258372964444268,
|
|
"calibration/buffer_entropy_50bins": 0.8588073303858943,
|
|
"calibration/confidence_entropy": 0.5410896147965197,
|
|
"calibration/coverage@0%": 0.004303763885691263,
|
|
"calibration/coverage@1%": 0.004303763885691263,
|
|
"calibration/coverage@10%": 0.004303763885691263,
|
|
"calibration/coverage@15%": 0.010163138885691262,
|
|
"calibration/coverage@20%": 0.01758654275066191,
|
|
"calibration/coverage@25%": 0.12735369961340698,
|
|
"calibration/coverage@30%": 0.20470204520164229,
|
|
"calibration/coverage@5%": 0.004303763885691263,
|
|
"calibration/ece": 0.21352444508406468,
|
|
"calibration/mean_confidence": 0.48576702979342884,
|
|
"calibration/prompt_uniqueness": 0.8955215311199272,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00244140625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 825.0,
|
|
"completions/mean_length": 122.3205078125,
|
|
"completions/mean_terminated_length": 118.86110992431641,
|
|
"completions/min_length": 46.8,
|
|
"completions/min_terminated_length": 46.8,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.001746510504744947,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0087,
|
|
"num_tokens": 150106561.0,
|
|
"reward": 0.8010232448577881,
|
|
"reward_std": 0.1486440747976303,
|
|
"rewards/accuracy_reward": 0.4798828125,
|
|
"rewards/brier_reward": 0.7014939427375794,
|
|
"rewards/format_reward": 0.9962890625,
|
|
"rewards/frontier_aurc_reward": -0.004155356530100107,
|
|
"rewards/frontier_coverage_1": 0.03323503416031599,
|
|
"rewards/frontier_coverage_10": 0.03323503416031599,
|
|
"rewards/frontier_coverage_15": 0.03323503416031599,
|
|
"rewards/frontier_coverage_20": 0.03323503416031599,
|
|
"rewards/frontier_coverage_25": 0.03323503416031599,
|
|
"rewards/frontier_coverage_5": 0.03323503416031599,
|
|
"rewards/frontier_ece_reward": 0.0009955904446542264,
|
|
"rewards/frontier_entropy_batch_reward": -0.22695176005363465,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17799072265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.204296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.22953784465789795,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.088995361328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.088995361328125,
|
|
"signal/advantage_abs_mean": 0.11683495044708252,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11683495044708252,
|
|
"signal/advantage_pre_scale_std": 0.1583361119031906,
|
|
"signal/advantage_std": 0.1583361119031906,
|
|
"signal/brier_reward/centered_abs_mean": 0.2091756820678711,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9421875,
|
|
"signal/brier_reward/group_std_mean": 0.2564647078514099,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026146960258483887,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.026146960258483887,
|
|
"signal/format_reward/centered_abs_mean": 0.00714111328125,
|
|
"signal/format_reward/group_bin_occupancy": 0.13828125,
|
|
"signal/format_reward/group_std_mean": 0.01964699849486351,
|
|
"signal/format_reward/group_zero_std_frac": 0.89375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003570556640625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003570556640625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002061827527359128,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.815234375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029503189492970706,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6906712193740535e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6906712193740535e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2580311119556427,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.945703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.32445969581604006,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2580311119556427,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.945703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.32445969581604006,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2580311119556427,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.945703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.32445969581604006,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2580311119556427,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.945703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.32445969581604006,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2580311119556427,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.945703125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.32445969581604006,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2580311119556427,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.945703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.32445969581604006,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004618756845593453,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06224460154771805,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.85703125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0892532080411911,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007780575193464756,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007780575193464756,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31719207763671875,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39413705468177795,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.039649009704589844,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039649009704589844,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5193675277297729,
|
|
"calibration/batch_distribution_entropy": 0.983820122323354,
|
|
"calibration/batch_entropy_100bins": 0.9716850235861294,
|
|
"calibration/batch_entropy_10bins": 0.983820122323354,
|
|
"calibration/batch_entropy_50bins": 0.9798083767768972,
|
|
"calibration/batch_uniqueness": 0.9539231310533063,
|
|
"calibration/buffer_distribution_entropy": 0.9415060100334243,
|
|
"calibration/buffer_entropy_100bins": 0.8440598741793546,
|
|
"calibration/buffer_entropy_10bins": 0.9415060100334243,
|
|
"calibration/buffer_entropy_50bins": 0.888877610865622,
|
|
"calibration/confidence_entropy": 0.5284500641790459,
|
|
"calibration/coverage@0%": 0.002352179777096941,
|
|
"calibration/coverage@1%": 0.002352179777096941,
|
|
"calibration/coverage@10%": 0.002352179777096941,
|
|
"calibration/coverage@15%": 0.002352179777096941,
|
|
"calibration/coverage@20%": 0.003134958642067587,
|
|
"calibration/coverage@25%": 0.004700516372008879,
|
|
"calibration/coverage@30%": 0.005483295236979524,
|
|
"calibration/coverage@5%": 0.002352179777096941,
|
|
"calibration/ece": 0.21469450707488327,
|
|
"calibration/mean_confidence": 0.5467065595076134,
|
|
"calibration/prompt_uniqueness": 0.8933109723215835,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00185546875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 966.0,
|
|
"completions/mean_length": 125.73544921875,
|
|
"completions/mean_terminated_length": 123.11231536865235,
|
|
"completions/min_length": 50.6,
|
|
"completions/min_terminated_length": 50.6,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0022161127999424934,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0047,
|
|
"num_tokens": 166415020.0,
|
|
"reward": 0.7875006437301636,
|
|
"reward_std": 0.15233681201934815,
|
|
"rewards/accuracy_reward": 0.44111328125,
|
|
"rewards/brier_reward": 0.6943708896636963,
|
|
"rewards/format_reward": 0.9970703125,
|
|
"rewards/frontier_aurc_reward": -0.004590437188744545,
|
|
"rewards/frontier_coverage_1": 0.05425913706421852,
|
|
"rewards/frontier_coverage_10": 0.05425913706421852,
|
|
"rewards/frontier_coverage_15": 0.05425913706421852,
|
|
"rewards/frontier_coverage_20": 0.05425913706421852,
|
|
"rewards/frontier_coverage_25": 0.05425913706421852,
|
|
"rewards/frontier_coverage_5": 0.05425913706421852,
|
|
"rewards/frontier_ece_reward": -0.0013219955493696035,
|
|
"rewards/frontier_entropy_batch_reward": -0.19174024760723113,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.169244384765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.196484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.2129174590110779,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.428125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0846221923828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0846221923828125,
|
|
"signal/advantage_abs_mean": 0.122350013256073,
|
|
"signal/advantage_pre_scale_abs_mean": 0.122350013256073,
|
|
"signal/advantage_pre_scale_std": 0.16442006826400757,
|
|
"signal/advantage_std": 0.16442006826400757,
|
|
"signal/brier_reward/centered_abs_mean": 0.21847104728221894,
|
|
"signal/brier_reward/group_bin_occupancy": 0.945703125,
|
|
"signal/brier_reward/group_std_mean": 0.26607994437217714,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027308880910277368,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.027308880910277368,
|
|
"signal/format_reward/centered_abs_mean": 0.00565185546875,
|
|
"signal/format_reward/group_bin_occupancy": 0.1359375,
|
|
"signal/format_reward/group_std_mean": 0.01590019799768925,
|
|
"signal/format_reward/group_zero_std_frac": 0.9125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002825927734375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.002825927734375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002719328412786126,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8203125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038117329590022565,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.867597672273405e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.867597672273405e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22449846267700196,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.924609375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29227436184883115,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22449846267700196,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.924609375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29227436184883115,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22449846267700196,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.924609375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29227436184883115,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22449846267700196,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.924609375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29227436184883115,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22449846267700196,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.924609375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29227436184883115,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22449846267700196,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.924609375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29227436184883115,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004018522240221501,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07301094681024552,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.905078125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09825690239667892,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00912636835128069,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00912636835128069,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2845084547996521,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758203125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3657692790031433,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03556355684995651,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03556355684995651,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.6180015396742866,
|
|
"eval_calibration/batch_distribution_entropy": 0.9232840224430083,
|
|
"eval_calibration/batch_entropy_100bins": 0.719649833384205,
|
|
"eval_calibration/batch_entropy_10bins": 0.9232840224430083,
|
|
"eval_calibration/batch_entropy_50bins": 0.8035427782506473,
|
|
"eval_calibration/batch_uniqueness": 0.9052734375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9484987743799115,
|
|
"eval_calibration/buffer_entropy_100bins": 0.8625570001373941,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9484987743799115,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9029135065796394,
|
|
"eval_calibration/confidence_entropy": 0.5333216449567841,
|
|
"eval_calibration/coverage@0%": 0.0,
|
|
"eval_calibration/coverage@1%": 0.0,
|
|
"eval_calibration/coverage@10%": 0.0,
|
|
"eval_calibration/coverage@15%": 0.0,
|
|
"eval_calibration/coverage@20%": 0.046875,
|
|
"eval_calibration/coverage@25%": 0.046875,
|
|
"eval_calibration/coverage@30%": 0.140625,
|
|
"eval_calibration/coverage@5%": 0.0,
|
|
"eval_calibration/ece": 0.330758367790426,
|
|
"eval_calibration/mean_confidence": 0.5541032170362717,
|
|
"eval_calibration/prompt_uniqueness": 0.9052734375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 409.75,
|
|
"eval_completions/max_terminated_length": 409.75,
|
|
"eval_completions/mean_length": 132.97144317626953,
|
|
"eval_completions/mean_terminated_length": 132.97144317626953,
|
|
"eval_completions/min_length": 66.0,
|
|
"eval_completions/min_terminated_length": 66.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 166415020.0,
|
|
"eval_reward": 0.6569966375827789,
|
|
"eval_reward_std": 0.23399890586733818,
|
|
"eval_rewards/accuracy_reward": 0.365234375,
|
|
"eval_rewards/brier_reward": 0.703216090798378,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.004859182401560247,
|
|
"eval_rewards/frontier_coverage_1": 0.10959587432444096,
|
|
"eval_rewards/frontier_coverage_10": 0.10959587432444096,
|
|
"eval_rewards/frontier_coverage_15": 0.10959587432444096,
|
|
"eval_rewards/frontier_coverage_20": 0.10959587432444096,
|
|
"eval_rewards/frontier_coverage_25": 0.10959587432444096,
|
|
"eval_rewards/frontier_coverage_5": 0.10959587432444096,
|
|
"eval_rewards/frontier_ece_reward": -0.001649250101763755,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 20.2387,
|
|
"eval_samples_per_second": 24.705,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4493408203125,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48055653274059296,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22467041015625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22467041015625,
|
|
"eval_signal/advantage_abs_mean": 0.20993919670581818,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20993919670581818,
|
|
"eval_signal/advantage_pre_scale_std": 0.23163216933608055,
|
|
"eval_signal/advantage_std": 0.23163216933608055,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21175387874245644,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.96875,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2560478299856186,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026469234842807055,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.026469234842807055,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003475597535725683,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.921875,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004670257214456797,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.22131901764078e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.22131901764078e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.28018152713775635,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.36891133338212967,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.28018152713775635,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.36891133338212967,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.28018152713775635,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.36891133338212967,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.28018152713775635,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.36891133338212967,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.28018152713775635,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.36891133338212967,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.28018152713775635,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.36891133338212967,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005015249014832079,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0727236233651638,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.875,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.10696529969573021,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009090452920645475,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009090452920645475,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.198,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.49154793221632376,
|
|
"calibration/batch_distribution_entropy": 0.9923982265099717,
|
|
"calibration/batch_entropy_100bins": 0.976761110553309,
|
|
"calibration/batch_entropy_10bins": 0.9923982265099717,
|
|
"calibration/batch_entropy_50bins": 0.9853177330835299,
|
|
"calibration/batch_uniqueness": 0.9559410835975026,
|
|
"calibration/buffer_distribution_entropy": 0.9529318045546697,
|
|
"calibration/buffer_entropy_100bins": 0.8728419282733055,
|
|
"calibration/buffer_entropy_10bins": 0.9529318045546697,
|
|
"calibration/buffer_entropy_50bins": 0.9107275009636199,
|
|
"calibration/confidence_entropy": 0.5137282393272424,
|
|
"calibration/coverage@0%": 0.003908543297455968,
|
|
"calibration/coverage@1%": 0.003908543297455968,
|
|
"calibration/coverage@10%": 0.003908543297455968,
|
|
"calibration/coverage@15%": 0.0050804182974559685,
|
|
"calibration/coverage@20%": 0.009769447162426614,
|
|
"calibration/coverage@25%": 0.010550697162426615,
|
|
"calibration/coverage@30%": 0.021499663649706457,
|
|
"calibration/coverage@5%": 0.003908543297455968,
|
|
"calibration/ece": 0.21824396695551201,
|
|
"calibration/mean_confidence": 0.4844792598613541,
|
|
"calibration/prompt_uniqueness": 0.8977543635373308,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.001171875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 887.8,
|
|
"completions/mean_length": 135.89765625,
|
|
"completions/mean_terminated_length": 134.25655059814454,
|
|
"completions/min_length": 58.0,
|
|
"completions/min_terminated_length": 58.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.001891042571514845,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.003,
|
|
"num_tokens": 183043732.0,
|
|
"reward": 0.7944678544998169,
|
|
"reward_std": 0.1392228126525879,
|
|
"rewards/accuracy_reward": 0.44111328125,
|
|
"rewards/brier_reward": 0.6948043942451477,
|
|
"rewards/format_reward": 0.9982421875,
|
|
"rewards/frontier_aurc_reward": -0.004382272064685821,
|
|
"rewards/frontier_coverage_1": 0.06402078047394752,
|
|
"rewards/frontier_coverage_10": 0.06402078047394752,
|
|
"rewards/frontier_coverage_15": 0.06402078047394752,
|
|
"rewards/frontier_coverage_20": 0.06402078047394752,
|
|
"rewards/frontier_coverage_25": 0.06402078047394752,
|
|
"rewards/frontier_coverage_5": 0.06402078047394752,
|
|
"rewards/frontier_ece_reward": 0.0002359504927881062,
|
|
"rewards/frontier_entropy_batch_reward": -0.15109863579273225,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.155535888671875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19375,
|
|
"signal/accuracy_reward/group_std_mean": 0.20017340481281282,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0777679443359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0777679443359375,
|
|
"signal/advantage_abs_mean": 0.11083731651306153,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11083731651306153,
|
|
"signal/advantage_pre_scale_std": 0.14989208579063415,
|
|
"signal/advantage_std": 0.14989208579063415,
|
|
"signal/brier_reward/centered_abs_mean": 0.22810422778129577,
|
|
"signal/brier_reward/group_bin_occupancy": 0.94296875,
|
|
"signal/brier_reward/group_std_mean": 0.27661994099617004,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02851302847266197,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02851302847266197,
|
|
"signal/format_reward/centered_abs_mean": 0.0033935546875,
|
|
"signal/format_reward/group_bin_occupancy": 0.131640625,
|
|
"signal/format_reward/group_std_mean": 0.009607380395755172,
|
|
"signal/format_reward/group_zero_std_frac": 0.946875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00169677734375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00169677734375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026019237469881774,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.793359375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037212247960269453,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.657443350879476e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.657443350879476e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.26558775305747984,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.934375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3330970585346222,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.26558775305747984,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.934375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3330970585346222,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.26558775305747984,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.934375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3330970585346222,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.26558775305747984,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.934375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3330970585346222,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.26558775305747984,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.934375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3330970585346222,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.26558775305747984,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.934375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3330970585346222,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004754020553082228,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06770721971988677,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09261107891798019,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008463402464985847,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008463402464985847,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2386895924806595,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.756640625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32064216732978823,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029836199060082436,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029836199060082436,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4491234760798785,
|
|
"calibration/batch_distribution_entropy": 0.98299611256218,
|
|
"calibration/batch_entropy_100bins": 0.9689459410888717,
|
|
"calibration/batch_entropy_10bins": 0.98299611256218,
|
|
"calibration/batch_entropy_50bins": 0.9776301185799596,
|
|
"calibration/batch_uniqueness": 0.9538074206065245,
|
|
"calibration/buffer_distribution_entropy": 0.9623449924153729,
|
|
"calibration/buffer_entropy_100bins": 0.8922897436238332,
|
|
"calibration/buffer_entropy_10bins": 0.9623449924153729,
|
|
"calibration/buffer_entropy_50bins": 0.9257487085635707,
|
|
"calibration/confidence_entropy": 0.49867141502553186,
|
|
"calibration/coverage@0%": 0.005078125,
|
|
"calibration/coverage@1%": 0.005078125,
|
|
"calibration/coverage@10%": 0.005859375,
|
|
"calibration/coverage@15%": 0.009765625,
|
|
"calibration/coverage@20%": 0.0125,
|
|
"calibration/coverage@25%": 0.021875,
|
|
"calibration/coverage@30%": 0.0421875,
|
|
"calibration/coverage@5%": 0.005078125,
|
|
"calibration/ece": 0.18586777873207352,
|
|
"calibration/mean_confidence": 0.4387145557850641,
|
|
"calibration/prompt_uniqueness": 0.891550752796566,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 994.2,
|
|
"completions/max_terminated_length": 709.2,
|
|
"completions/mean_length": 144.47421875,
|
|
"completions/mean_terminated_length": 143.9313934326172,
|
|
"completions/min_length": 58.0,
|
|
"completions/min_terminated_length": 58.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.0014471631729975343,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 199337964.0,
|
|
"reward": 0.8112979173660279,
|
|
"reward_std": 0.1323389947414398,
|
|
"rewards/accuracy_reward": 0.47412109375,
|
|
"rewards/brier_reward": 0.7108014822006226,
|
|
"rewards/format_reward": 0.99892578125,
|
|
"rewards/frontier_aurc_reward": -0.003819770412519574,
|
|
"rewards/frontier_coverage_1": 0.06823012800887227,
|
|
"rewards/frontier_coverage_10": 0.06823012800887227,
|
|
"rewards/frontier_coverage_15": 0.06823012800887227,
|
|
"rewards/frontier_coverage_20": 0.06823012800887227,
|
|
"rewards/frontier_coverage_25": 0.06823012800887227,
|
|
"rewards/frontier_coverage_5": 0.06823012800887227,
|
|
"rewards/frontier_ece_reward": 0.008699505100958049,
|
|
"rewards/frontier_entropy_batch_reward": -0.17938159108161927,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.148931884765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.193359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.19490036964416504,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0744659423828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0744659423828125,
|
|
"signal/advantage_abs_mean": 0.10477420687675476,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10477420687675476,
|
|
"signal/advantage_pre_scale_std": 0.14257683753967285,
|
|
"signal/advantage_std": 0.14257683753967285,
|
|
"signal/brier_reward/centered_abs_mean": 0.22218222618103028,
|
|
"signal/brier_reward/group_bin_occupancy": 0.919140625,
|
|
"signal/brier_reward/group_std_mean": 0.27116515636444094,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027772778272628786,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.027772778272628786,
|
|
"signal/format_reward/centered_abs_mean": 0.002081298828125,
|
|
"signal/format_reward/group_bin_occupancy": 0.129296875,
|
|
"signal/format_reward/group_std_mean": 0.006076698703691363,
|
|
"signal/format_reward/group_zero_std_frac": 0.965625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002302809851244092,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003347306279465556,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.122029495192692e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.122029495192692e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2844557523727417,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3530541956424713,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2844557523727417,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3530541956424713,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2844557523727417,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3530541956424713,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2844557523727417,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3530541956424713,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2844557523727417,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3530541956424713,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2844557523727417,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3530541956424713,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005091757886111737,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.061374531686306,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08481642305850982,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00767181646078825,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00767181646078825,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27003463804721833,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75390625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3499366283416748,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03375432975590229,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03375432975590229,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38886185437113174,
|
|
"calibration/batch_distribution_entropy": 0.993174546110124,
|
|
"calibration/batch_entropy_100bins": 0.9755285743495123,
|
|
"calibration/batch_entropy_10bins": 0.993174546110124,
|
|
"calibration/batch_entropy_50bins": 0.9853249378435794,
|
|
"calibration/batch_uniqueness": 0.9561890088248373,
|
|
"calibration/buffer_distribution_entropy": 0.9696478610556747,
|
|
"calibration/buffer_entropy_100bins": 0.9073566670205515,
|
|
"calibration/buffer_entropy_10bins": 0.9696478610556747,
|
|
"calibration/buffer_entropy_50bins": 0.9373040055132771,
|
|
"calibration/confidence_entropy": 0.4993493040570547,
|
|
"calibration/coverage@0%": 0.001954656862745098,
|
|
"calibration/coverage@1%": 0.001954656862745098,
|
|
"calibration/coverage@10%": 0.01878440245961398,
|
|
"calibration/coverage@15%": 0.08023254335980967,
|
|
"calibration/coverage@20%": 0.12720311240934729,
|
|
"calibration/coverage@25%": 0.3667257827788649,
|
|
"calibration/coverage@30%": 0.413671875,
|
|
"calibration/coverage@5%": 0.001954656862745098,
|
|
"calibration/ece": 0.23965065800221003,
|
|
"calibration/mean_confidence": 0.4801674712232713,
|
|
"calibration/prompt_uniqueness": 0.8904477757544225,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 1357.2,
|
|
"completions/max_terminated_length": 678.2,
|
|
"completions/mean_length": 154.43896484375,
|
|
"completions/mean_terminated_length": 153.49268188476563,
|
|
"completions/min_length": 62.0,
|
|
"completions/min_terminated_length": 62.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0015919266734272242,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0019,
|
|
"num_tokens": 215951643.0,
|
|
"reward": 0.8326294660568238,
|
|
"reward_std": 0.12973762750625611,
|
|
"rewards/accuracy_reward": 0.519140625,
|
|
"rewards/brier_reward": 0.7076422810554505,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.003565392177551985,
|
|
"rewards/frontier_coverage_1": 0.030470560118556023,
|
|
"rewards/frontier_coverage_10": 0.030470560118556023,
|
|
"rewards/frontier_coverage_15": 0.030470560118556023,
|
|
"rewards/frontier_coverage_20": 0.030470560118556023,
|
|
"rewards/frontier_coverage_25": 0.030470560118556023,
|
|
"rewards/frontier_coverage_5": 0.030470560118556023,
|
|
"rewards/frontier_ece_reward": 0.01095831673592329,
|
|
"rewards/frontier_entropy_batch_reward": -0.15589092671871185,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.142822265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.19114138782024384,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0714111328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0714111328125,
|
|
"signal/advantage_abs_mean": 0.10268731862306595,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10268731862306595,
|
|
"signal/advantage_pre_scale_std": 0.1399999141693115,
|
|
"signal/advantage_std": 0.1399999141693115,
|
|
"signal/brier_reward/centered_abs_mean": 0.22231624722480775,
|
|
"signal/brier_reward/group_bin_occupancy": 0.922265625,
|
|
"signal/brier_reward/group_std_mean": 0.2717791020870209,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02778953090310097,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02778953090310097,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.12890625,
|
|
"signal/format_reward/group_std_mean": 0.005524271540343762,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002406009705737233,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035011733416467905,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3067572551080954e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3067572551080954e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2699986696243286,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.919921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3401759326457977,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2699986696243286,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.919921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3401759326457977,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2699986696243286,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.919921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3401759326457977,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2699986696243286,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.919921875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3401759326457977,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2699986696243286,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.919921875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3401759326457977,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2699986696243286,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.919921875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3401759326457977,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004832976032048464,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06127958670258522,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8984375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08330589979887008,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007659948337823153,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007659948337823153,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24359245598316193,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3275866687297821,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03044905699789524,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03044905699789524,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.37149284629454293,
|
|
"calibration/batch_distribution_entropy": 0.9931569295884003,
|
|
"calibration/batch_entropy_100bins": 0.9783278813046771,
|
|
"calibration/batch_entropy_10bins": 0.9931569295884003,
|
|
"calibration/batch_entropy_50bins": 0.9868018130911993,
|
|
"calibration/batch_uniqueness": 0.9561319831691417,
|
|
"calibration/buffer_distribution_entropy": 0.9742613412980017,
|
|
"calibration/buffer_entropy_100bins": 0.9193829808162708,
|
|
"calibration/buffer_entropy_10bins": 0.9742613412980017,
|
|
"calibration/buffer_entropy_50bins": 0.946153440671097,
|
|
"calibration/confidence_entropy": 0.5044726683622951,
|
|
"calibration/coverage@0%": 0.009770982022946165,
|
|
"calibration/coverage@1%": 0.009770982022946165,
|
|
"calibration/coverage@10%": 0.026184111915314067,
|
|
"calibration/coverage@15%": 0.043773703402593914,
|
|
"calibration/coverage@20%": 0.12471181494858216,
|
|
"calibration/coverage@25%": 0.21463975444629907,
|
|
"calibration/coverage@30%": 0.34204774975058516,
|
|
"calibration/coverage@5%": 0.011333482022946165,
|
|
"calibration/ece": 0.17022807751994243,
|
|
"calibration/mean_confidence": 0.5015854712077894,
|
|
"calibration/prompt_uniqueness": 0.8873234306350446,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1016.2,
|
|
"completions/max_terminated_length": 697.0,
|
|
"completions/mean_length": 160.7646484375,
|
|
"completions/mean_terminated_length": 160.22604675292968,
|
|
"completions/min_length": 67.8,
|
|
"completions/min_terminated_length": 67.8,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.001383577473461628,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 232751057.0,
|
|
"reward": 0.8240685939788819,
|
|
"reward_std": 0.12576959878206254,
|
|
"rewards/accuracy_reward": 0.480078125,
|
|
"rewards/brier_reward": 0.7404986023902893,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0036113801877945663,
|
|
"rewards/frontier_coverage_1": 0.08505139946937561,
|
|
"rewards/frontier_coverage_10": 0.08505139946937561,
|
|
"rewards/frontier_coverage_15": 0.08505139946937561,
|
|
"rewards/frontier_coverage_20": 0.08505139946937561,
|
|
"rewards/frontier_coverage_25": 0.08505139946937561,
|
|
"rewards/frontier_coverage_5": 0.08505139946937561,
|
|
"rewards/frontier_ece_reward": 0.015613408572971822,
|
|
"rewards/frontier_entropy_batch_reward": -0.15448164641857148,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13514404296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.187890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.177346870303154,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.067572021484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.067572021484375,
|
|
"signal/advantage_abs_mean": 0.09964470565319061,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09964470565319061,
|
|
"signal/advantage_pre_scale_std": 0.13848601579666137,
|
|
"signal/advantage_std": 0.13848601579666137,
|
|
"signal/brier_reward/centered_abs_mean": 0.19449081718921662,
|
|
"signal/brier_reward/group_bin_occupancy": 0.896875,
|
|
"signal/brier_reward/group_std_mean": 0.24308900237083436,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024311352148652078,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024311352148652078,
|
|
"signal/format_reward/centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0024258273653686045,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002517683617770672,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036455394700169565,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.506653422140516e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.506653422140516e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23284452259540558,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.909765625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29805226922035216,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23284452259540558,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.909765625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29805226922035216,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23284452259540558,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.909765625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29805226922035216,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23284452259540558,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.909765625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29805226922035216,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23284452259540558,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.909765625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29805226922035216,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23284452259540558,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.909765625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29805226922035216,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041679169051349165,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0579533688724041,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.900390625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07829077690839767,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007244171109050512,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007244171109050512,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24184339344501496,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758984375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3229557752609253,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03023042418062687,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03023042418062687,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3843023569473395,
|
|
"calibration/batch_distribution_entropy": 0.9825564335986041,
|
|
"calibration/batch_entropy_100bins": 0.9674868151684599,
|
|
"calibration/batch_entropy_10bins": 0.9825564335986041,
|
|
"calibration/batch_entropy_50bins": 0.9772102320227738,
|
|
"calibration/batch_uniqueness": 0.9530455243124984,
|
|
"calibration/buffer_distribution_entropy": 0.9776003008409667,
|
|
"calibration/buffer_entropy_100bins": 0.9292828890688549,
|
|
"calibration/buffer_entropy_10bins": 0.9776003008409667,
|
|
"calibration/buffer_entropy_50bins": 0.9531789644661673,
|
|
"calibration/confidence_entropy": 0.5097281333912452,
|
|
"calibration/coverage@0%": 0.01525272137964775,
|
|
"calibration/coverage@1%": 0.01525272137964775,
|
|
"calibration/coverage@10%": 0.054781525195694715,
|
|
"calibration/coverage@15%": 0.10409047822896281,
|
|
"calibration/coverage@20%": 0.1984008072407045,
|
|
"calibration/coverage@25%": 0.242578125,
|
|
"calibration/coverage@30%": 0.265234375,
|
|
"calibration/coverage@5%": 0.017209668542074362,
|
|
"calibration/ece": 0.17762837478013666,
|
|
"calibration/mean_confidence": 0.5141673460767282,
|
|
"calibration/prompt_uniqueness": 0.8842985822060353,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 882.2,
|
|
"completions/max_terminated_length": 478.2,
|
|
"completions/mean_length": 172.72275390625,
|
|
"completions/mean_terminated_length": 172.45663757324218,
|
|
"completions/min_length": 72.0,
|
|
"completions/min_terminated_length": 72.0,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0014637865824624896,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 249771418.0,
|
|
"reward": 0.8475449323654175,
|
|
"reward_std": 0.126870197057724,
|
|
"rewards/accuracy_reward": 0.54345703125,
|
|
"rewards/brier_reward": 0.7408711910247803,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0031028116587549447,
|
|
"rewards/frontier_coverage_1": 0.043854419514536856,
|
|
"rewards/frontier_coverage_10": 0.043854419514536856,
|
|
"rewards/frontier_coverage_15": 0.043854419514536856,
|
|
"rewards/frontier_coverage_20": 0.043854419514536856,
|
|
"rewards/frontier_coverage_25": 0.043854419514536856,
|
|
"rewards/frontier_coverage_5": 0.043854419514536856,
|
|
"rewards/frontier_ece_reward": 0.018962536379694937,
|
|
"rewards/frontier_entropy_batch_reward": -0.1885848104953766,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.143426513671875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.18838266730308534,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0717132568359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0717132568359375,
|
|
"signal/advantage_abs_mean": 0.10101936310529709,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10101936310529709,
|
|
"signal/advantage_pre_scale_std": 0.14047700464725493,
|
|
"signal/advantage_std": 0.14047700464725493,
|
|
"signal/brier_reward/centered_abs_mean": 0.19066874384880067,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9046875,
|
|
"signal/brier_reward/group_std_mean": 0.23766070902347564,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023833592981100084,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023833592981100084,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002466377941891551,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.798046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035321788396686315,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4148163578938696e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4148163578938696e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22646748423576354,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.902734375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2924614608287811,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22646748423576354,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.902734375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2924614608287811,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22646748423576354,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.902734375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2924614608287811,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22646748423576354,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.902734375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2924614608287811,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22646748423576354,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.902734375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2924614608287811,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22646748423576354,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.902734375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2924614608287811,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004053767677396536,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05426667183637619,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07379693686962127,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006783333979547024,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006783333979547024,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2799087405204773,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35980047583580016,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034988592565059665,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034988592565059665,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29483841819292744,
|
|
"calibration/batch_distribution_entropy": 0.9777187935073307,
|
|
"calibration/batch_entropy_100bins": 0.9667154925944665,
|
|
"calibration/batch_entropy_10bins": 0.9777187935073307,
|
|
"calibration/batch_entropy_50bins": 0.9757633762213546,
|
|
"calibration/batch_uniqueness": 0.9522694542284718,
|
|
"calibration/buffer_distribution_entropy": 0.9800587855542565,
|
|
"calibration/buffer_entropy_100bins": 0.9373105642982278,
|
|
"calibration/buffer_entropy_10bins": 0.9800587855542565,
|
|
"calibration/buffer_entropy_50bins": 0.958673701749106,
|
|
"calibration/confidence_entropy": 0.4694770219979284,
|
|
"calibration/coverage@0%": 0.016024798189823875,
|
|
"calibration/coverage@1%": 0.016024798189823875,
|
|
"calibration/coverage@10%": 0.12074134662426614,
|
|
"calibration/coverage@15%": 0.23877048679060664,
|
|
"calibration/coverage@20%": 0.35218704134050877,
|
|
"calibration/coverage@25%": 0.449902917074364,
|
|
"calibration/coverage@30%": 0.5347044704011742,
|
|
"calibration/coverage@5%": 0.016806048189823876,
|
|
"calibration/ece": 0.145108984662015,
|
|
"calibration/mean_confidence": 0.4939348278541888,
|
|
"calibration/prompt_uniqueness": 0.8741655522405047,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 939.4,
|
|
"completions/max_terminated_length": 736.8,
|
|
"completions/mean_length": 176.50556640625,
|
|
"completions/mean_terminated_length": 176.3722412109375,
|
|
"completions/min_length": 82.2,
|
|
"completions/min_terminated_length": 82.2,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0011565532768145204,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 266633651.0,
|
|
"reward": 0.8428894639015198,
|
|
"reward_std": 0.11935372054576873,
|
|
"rewards/accuracy_reward": 0.51728515625,
|
|
"rewards/brier_reward": 0.7649436235427857,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.002899883547797799,
|
|
"rewards/frontier_coverage_1": 0.09639933593571186,
|
|
"rewards/frontier_coverage_10": 0.09639933593571186,
|
|
"rewards/frontier_coverage_15": 0.09639933593571186,
|
|
"rewards/frontier_coverage_20": 0.09639933593571186,
|
|
"rewards/frontier_coverage_25": 0.09639933593571186,
|
|
"rewards/frontier_coverage_5": 0.09639933593571186,
|
|
"rewards/frontier_ece_reward": 0.023540638387203217,
|
|
"rewards/frontier_entropy_batch_reward": -0.19457639753818512,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.130194091796875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.186328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.17149793207645417,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.509375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0650970458984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0650970458984375,
|
|
"signal/advantage_abs_mean": 0.0930386334657669,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0930386334657669,
|
|
"signal/advantage_pre_scale_std": 0.13454234898090361,
|
|
"signal/advantage_std": 0.13454234898090361,
|
|
"signal/brier_reward/centered_abs_mean": 0.17940108776092528,
|
|
"signal/brier_reward/group_bin_occupancy": 0.879296875,
|
|
"signal/brier_reward/group_std_mean": 0.2269110530614853,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02242513597011566,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02242513597011566,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024126087315380572,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.773046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003546612523496151,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.318569772294722e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.318569772294722e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22456566095352173,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.897265625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28588892221450807,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22456566095352173,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.897265625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28588892221450807,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22456566095352173,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.897265625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28588892221450807,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22456566095352173,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.897265625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28588892221450807,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22456566095352173,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.897265625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28588892221450807,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22456566095352173,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.897265625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28588892221450807,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004019725229591131,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.050846466422080995,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.88046875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06863305419683456,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006355808302760124,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006355808302760124,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27906052470207215,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74765625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3593753814697266,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03488256558775902,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03488256558775902,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.396818313806098,
|
|
"calibration/batch_distribution_entropy": 0.9904097057091761,
|
|
"calibration/batch_entropy_100bins": 0.9737173977651677,
|
|
"calibration/batch_entropy_10bins": 0.9904097057091761,
|
|
"calibration/batch_entropy_50bins": 0.9832208433275686,
|
|
"calibration/batch_uniqueness": 0.9558441162109375,
|
|
"calibration/buffer_distribution_entropy": 0.9823054908036963,
|
|
"calibration/buffer_entropy_100bins": 0.9438449914736629,
|
|
"calibration/buffer_entropy_10bins": 0.9823054908036963,
|
|
"calibration/buffer_entropy_50bins": 0.9631353066861775,
|
|
"calibration/confidence_entropy": 0.48614395905828134,
|
|
"calibration/coverage@0%": 0.010546875,
|
|
"calibration/coverage@1%": 0.010546875,
|
|
"calibration/coverage@10%": 0.047265625,
|
|
"calibration/coverage@15%": 0.06640625,
|
|
"calibration/coverage@20%": 0.16875,
|
|
"calibration/coverage@25%": 0.22734375,
|
|
"calibration/coverage@30%": 0.33125,
|
|
"calibration/coverage@5%": 0.01328125,
|
|
"calibration/ece": 0.14752605867831683,
|
|
"calibration/mean_confidence": 0.49425239867748705,
|
|
"calibration/prompt_uniqueness": 0.879052734375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 687.0,
|
|
"completions/max_terminated_length": 687.0,
|
|
"completions/mean_length": 186.03525390625,
|
|
"completions/mean_terminated_length": 186.03525390625,
|
|
"completions/min_length": 78.2,
|
|
"completions/min_terminated_length": 78.2,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0011228998191654682,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 283504348.0,
|
|
"reward": 0.8343194007873536,
|
|
"reward_std": 0.1147305577993393,
|
|
"rewards/accuracy_reward": 0.49560546875,
|
|
"rewards/brier_reward": 0.7591516852378846,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0030454121064394713,
|
|
"rewards/frontier_coverage_1": 0.1056124085560441,
|
|
"rewards/frontier_coverage_10": 0.1056124085560441,
|
|
"rewards/frontier_coverage_15": 0.1056124085560441,
|
|
"rewards/frontier_coverage_20": 0.1056124085560441,
|
|
"rewards/frontier_coverage_25": 0.1056124085560441,
|
|
"rewards/frontier_coverage_5": 0.1056124085560441,
|
|
"rewards/frontier_ece_reward": 0.019984208419919013,
|
|
"rewards/frontier_entropy_batch_reward": -0.17652736306190492,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.127191162109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.183984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.16545215547084807,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.528125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0635955810546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0635955810546875,
|
|
"signal/advantage_abs_mean": 0.09112635999917984,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09112635999917984,
|
|
"signal/advantage_pre_scale_std": 0.1298075333237648,
|
|
"signal/advantage_std": 0.1298075333237648,
|
|
"signal/brier_reward/centered_abs_mean": 0.17884210646152496,
|
|
"signal/brier_reward/group_bin_occupancy": 0.869140625,
|
|
"signal/brier_reward/group_std_mean": 0.22582717537879943,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02235526330769062,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02235526330769062,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023218464106321336,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003383269626647234,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1561049147276205e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1561049147276205e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22928011119365693,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.894921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2927806079387665,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22928011119365693,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.894921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2927806079387665,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22928011119365693,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.894921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2927806079387665,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22928011119365693,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.894921875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2927806079387665,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22928011119365693,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.894921875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2927806079387665,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22928011119365693,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.894921875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2927806079387665,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004104113671928644,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04553831294178963,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0625507190823555,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005692289117723704,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005692289117723704,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.262398362159729,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34030004143714904,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032799795269966125,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032799795269966125,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3438739028381422,
|
|
"calibration/batch_distribution_entropy": 0.9872127090769016,
|
|
"calibration/batch_entropy_100bins": 0.9745401927062021,
|
|
"calibration/batch_entropy_10bins": 0.9872127090769016,
|
|
"calibration/batch_entropy_50bins": 0.9833082642512846,
|
|
"calibration/batch_uniqueness": 0.955206298828125,
|
|
"calibration/buffer_distribution_entropy": 0.9842534881328724,
|
|
"calibration/buffer_entropy_100bins": 0.9495188377364201,
|
|
"calibration/buffer_entropy_10bins": 0.9842534881328724,
|
|
"calibration/buffer_entropy_50bins": 0.9670404366119122,
|
|
"calibration/confidence_entropy": 0.4957282055397302,
|
|
"calibration/coverage@0%": 0.0046875,
|
|
"calibration/coverage@1%": 0.0046875,
|
|
"calibration/coverage@10%": 0.06484375,
|
|
"calibration/coverage@15%": 0.106640625,
|
|
"calibration/coverage@20%": 0.14296875,
|
|
"calibration/coverage@25%": 0.212890625,
|
|
"calibration/coverage@30%": 0.2953125,
|
|
"calibration/coverage@5%": 0.01953125,
|
|
"calibration/ece": 0.13413243536778918,
|
|
"calibration/mean_confidence": 0.5377243429204241,
|
|
"calibration/prompt_uniqueness": 0.878662109375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 754.2,
|
|
"completions/max_terminated_length": 565.8,
|
|
"completions/mean_length": 182.9107421875,
|
|
"completions/mean_terminated_length": 182.64695739746094,
|
|
"completions/min_length": 80.4,
|
|
"completions/min_terminated_length": 80.4,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0012018464040011168,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 300335530.0,
|
|
"reward": 0.8439226388931275,
|
|
"reward_std": 0.11675801277160644,
|
|
"rewards/accuracy_reward": 0.51513671875,
|
|
"rewards/brier_reward": 0.7593789458274841,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0030923429410904648,
|
|
"rewards/frontier_coverage_1": 0.10312287509441376,
|
|
"rewards/frontier_coverage_10": 0.10312287509441376,
|
|
"rewards/frontier_coverage_15": 0.10312287509441376,
|
|
"rewards/frontier_coverage_20": 0.10312287509441376,
|
|
"rewards/frontier_coverage_25": 0.10312287509441376,
|
|
"rewards/frontier_coverage_5": 0.10312287509441376,
|
|
"rewards/frontier_ece_reward": 0.01944323191419244,
|
|
"rewards/frontier_entropy_batch_reward": -0.1730232924222946,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.134979248046875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.17875251770019532,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0674896240234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0674896240234375,
|
|
"signal/advantage_abs_mean": 0.0906929224729538,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0906929224729538,
|
|
"signal/advantage_pre_scale_std": 0.13175700902938842,
|
|
"signal/advantage_std": 0.13175700902938842,
|
|
"signal/brier_reward/centered_abs_mean": 0.17308151721954346,
|
|
"signal/brier_reward/group_bin_occupancy": 0.875,
|
|
"signal/brier_reward/group_std_mean": 0.2171693116426468,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021635189652442932,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021635189652442932,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_bin_occupancy": 0.128125,
|
|
"signal/format_reward/group_std_mean": 0.004419417353346944,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002408099686726928,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.76796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035015761386603117,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.310498406994157e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.310498406994157e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21902143955230713,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27995782494544985,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21902143955230713,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27995782494544985,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21902143955230713,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27995782494544985,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21902143955230713,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27995782494544985,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21902143955230713,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27995782494544985,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21902143955230713,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27995782494544985,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003920483542606235,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.043396206200122835,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05951143801212311,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005424525775015354,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005424525775015354,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2536126673221588,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7515625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.330656635761261,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03170158341526985,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03170158341526985,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30520045908732824,
|
|
"calibration/batch_distribution_entropy": 0.9809410439481454,
|
|
"calibration/batch_entropy_100bins": 0.9710889487139797,
|
|
"calibration/batch_entropy_10bins": 0.9809410439481454,
|
|
"calibration/batch_entropy_50bins": 0.979682041413058,
|
|
"calibration/batch_uniqueness": 0.9534674290791777,
|
|
"calibration/buffer_distribution_entropy": 0.9852726079888707,
|
|
"calibration/buffer_entropy_100bins": 0.9542494347166681,
|
|
"calibration/buffer_entropy_10bins": 0.9852726079888707,
|
|
"calibration/buffer_entropy_50bins": 0.970168674026028,
|
|
"calibration/confidence_entropy": 0.487046948757095,
|
|
"calibration/coverage@0%": 0.00703660102739726,
|
|
"calibration/coverage@1%": 0.00703660102739726,
|
|
"calibration/coverage@10%": 0.06920177959882583,
|
|
"calibration/coverage@15%": 0.11181124633072406,
|
|
"calibration/coverage@20%": 0.20913420376712327,
|
|
"calibration/coverage@25%": 0.3654484160958904,
|
|
"calibration/coverage@30%": 0.5100178877201565,
|
|
"calibration/coverage@5%": 0.00703660102739726,
|
|
"calibration/ece": 0.11105027567237662,
|
|
"calibration/mean_confidence": 0.5574284681401743,
|
|
"calibration/prompt_uniqueness": 0.8712910948881373,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 663.8,
|
|
"completions/max_terminated_length": 663.8,
|
|
"completions/mean_length": 184.992578125,
|
|
"completions/mean_terminated_length": 184.992578125,
|
|
"completions/min_length": 77.6,
|
|
"completions/min_terminated_length": 77.6,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0013152319006621838,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 317159806.0,
|
|
"reward": 0.8400702953338623,
|
|
"reward_std": 0.11409472972154618,
|
|
"rewards/accuracy_reward": 0.50537109375,
|
|
"rewards/brier_reward": 0.7638264536857605,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0030908068176358936,
|
|
"rewards/frontier_coverage_1": 0.1076532706618309,
|
|
"rewards/frontier_coverage_10": 0.1076532706618309,
|
|
"rewards/frontier_coverage_15": 0.1076532706618309,
|
|
"rewards/frontier_coverage_20": 0.1076532706618309,
|
|
"rewards/frontier_coverage_25": 0.1076532706618309,
|
|
"rewards/frontier_coverage_5": 0.1076532706618309,
|
|
"rewards/frontier_ece_reward": 0.019258670136332513,
|
|
"rewards/frontier_entropy_batch_reward": -0.1748884290456772,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.126800537109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.186328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1693242758512497,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.509375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0634002685546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0634002685546875,
|
|
"signal/advantage_abs_mean": 0.08780478239059449,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08780478239059449,
|
|
"signal/advantage_pre_scale_std": 0.1284501165151596,
|
|
"signal/advantage_std": 0.1284501165151596,
|
|
"signal/brier_reward/centered_abs_mean": 0.1634564906358719,
|
|
"signal/brier_reward/group_bin_occupancy": 0.869140625,
|
|
"signal/brier_reward/group_std_mean": 0.207411727309227,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020432061329483987,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020432061329483987,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002385811135172844,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.784375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034592232666909696,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2706017120508476e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2706017120508476e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20289005041122438,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26228512823581696,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20289005041122438,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26228512823581696,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20289005041122438,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26228512823581696,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20289005041122438,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26228512823581696,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20289005041122438,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26228512823581696,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20289005041122438,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26228512823581696,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003631731867790222,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.041925042122602466,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.84453125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05757189467549324,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005240630265325308,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005240630265325308,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25924491286277773,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.754296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3380768716335297,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032405614107847217,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032405614107847217,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2647507481106945,
|
|
"calibration/batch_distribution_entropy": 0.9905705255561216,
|
|
"calibration/batch_entropy_100bins": 0.9749278642951309,
|
|
"calibration/batch_entropy_10bins": 0.9905705255561216,
|
|
"calibration/batch_entropy_50bins": 0.9843588547973605,
|
|
"calibration/batch_uniqueness": 0.9554248612111863,
|
|
"calibration/buffer_distribution_entropy": 0.9859378110837035,
|
|
"calibration/buffer_entropy_100bins": 0.9583101311689097,
|
|
"calibration/buffer_entropy_10bins": 0.9859378110837035,
|
|
"calibration/buffer_entropy_50bins": 0.9726730927304548,
|
|
"calibration/confidence_entropy": 0.48981557023424893,
|
|
"calibration/coverage@0%": 0.0453491927592955,
|
|
"calibration/coverage@1%": 0.0453491927592955,
|
|
"calibration/coverage@10%": 0.24703170865949117,
|
|
"calibration/coverage@15%": 0.34710127201565555,
|
|
"calibration/coverage@20%": 0.41707207069471625,
|
|
"calibration/coverage@25%": 0.49878913894324856,
|
|
"calibration/coverage@30%": 0.572265625,
|
|
"calibration/coverage@5%": 0.09306124633072407,
|
|
"calibration/ece": 0.1539305622574378,
|
|
"calibration/mean_confidence": 0.5290979156854486,
|
|
"calibration/prompt_uniqueness": 0.8721267620805151,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 890.6,
|
|
"completions/max_terminated_length": 481.2,
|
|
"completions/mean_length": 185.70908203125,
|
|
"completions/mean_terminated_length": 185.3141632080078,
|
|
"completions/min_length": 92.6,
|
|
"completions/min_terminated_length": 92.6,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0009060048614628613,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 334150171.0,
|
|
"reward": 0.8524771690368652,
|
|
"reward_std": 0.09913994669914246,
|
|
"rewards/accuracy_reward": 0.5279296875,
|
|
"rewards/brier_reward": 0.7727201581001282,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.00271282319445163,
|
|
"rewards/frontier_coverage_1": 0.09441364500671626,
|
|
"rewards/frontier_coverage_10": 0.09441364500671626,
|
|
"rewards/frontier_coverage_15": 0.09441364500671626,
|
|
"rewards/frontier_coverage_20": 0.09441364500671626,
|
|
"rewards/frontier_coverage_25": 0.09441364500671626,
|
|
"rewards/frontier_coverage_5": 0.09441364500671626,
|
|
"rewards/frontier_ece_reward": 0.02004805374890566,
|
|
"rewards/frontier_entropy_batch_reward": -0.16383886635303496,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0960205078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.178125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13594979792833328,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.575,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04801025390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04801025390625,
|
|
"signal/advantage_abs_mean": 0.07516542375087738,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07516542375087738,
|
|
"signal/advantage_pre_scale_std": 0.11188042908906937,
|
|
"signal/advantage_std": 0.11188042908906937,
|
|
"signal/brier_reward/centered_abs_mean": 0.15651972889900206,
|
|
"signal/brier_reward/group_bin_occupancy": 0.848828125,
|
|
"signal/brier_reward/group_std_mean": 0.20049535632133483,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019564966112375258,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019564966112375258,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002122000069357455,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.769921875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003134680772200227,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.79838005756028e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.79838005756028e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19597499668598176,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25266251862049105,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19597499668598176,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25266251862049105,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19597499668598176,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25266251862049105,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19597499668598176,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25266251862049105,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19597499668598176,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25266251862049105,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19597499668598176,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25266251862049105,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035079522524029015,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.038548742234706876,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83828125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05287352129817009,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048185927793383595,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048185927793383595,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24952644407749175,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740234375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32922094464302065,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03119080550968647,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03119080550968647,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.5577346842025488,
|
|
"eval_calibration/batch_distribution_entropy": 0.9293753890186642,
|
|
"eval_calibration/batch_entropy_100bins": 0.7052278361140917,
|
|
"eval_calibration/batch_entropy_10bins": 0.9293753890186642,
|
|
"eval_calibration/batch_entropy_50bins": 0.7897002923025568,
|
|
"eval_calibration/batch_uniqueness": 0.8984375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9865955502836785,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9606136359667189,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9865955502836785,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9741520355969424,
|
|
"eval_calibration/confidence_entropy": 0.49245299957964217,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.046875,
|
|
"eval_calibration/coverage@25%": 0.0625,
|
|
"eval_calibration/coverage@30%": 0.0625,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.2602928906962808,
|
|
"eval_calibration/mean_confidence": 0.4507762260644716,
|
|
"eval_calibration/prompt_uniqueness": 0.8984375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 387.5,
|
|
"eval_completions/max_terminated_length": 387.5,
|
|
"eval_completions/mean_length": 186.24878692626953,
|
|
"eval_completions/mean_terminated_length": 186.24878692626953,
|
|
"eval_completions/min_length": 94.5,
|
|
"eval_completions/min_terminated_length": 94.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 334150171.0,
|
|
"eval_reward": 0.6920952647924423,
|
|
"eval_reward_std": 0.21343515813350677,
|
|
"eval_rewards/accuracy_reward": 0.400390625,
|
|
"eval_rewards/brier_reward": 0.76617431640625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0032768649398349226,
|
|
"eval_rewards/frontier_coverage_1": 0.17960688099265099,
|
|
"eval_rewards/frontier_coverage_10": 0.17960688099265099,
|
|
"eval_rewards/frontier_coverage_15": 0.17960688099265099,
|
|
"eval_rewards/frontier_coverage_20": 0.17960688099265099,
|
|
"eval_rewards/frontier_coverage_25": 0.17960688099265099,
|
|
"eval_rewards/frontier_coverage_5": 0.17960688099265099,
|
|
"eval_rewards/frontier_ece_reward": 0.015176349552348256,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 20.5399,
|
|
"eval_samples_per_second": 24.343,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4666748046875,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49022945761680603,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23333740234375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23333740234375,
|
|
"eval_signal/advantage_abs_mean": 0.19233160465955734,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19233160465955734,
|
|
"eval_signal/advantage_pre_scale_std": 0.21115415170788765,
|
|
"eval_signal/advantage_std": 0.21115415170788765,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20825786143541336,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.9296875,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2583780698478222,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02603223267942667,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02603223267942667,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002897722239140421,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8359375,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0046058918233029544,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.186922862776555e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.186922862776555e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.39053118973970413,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.47852831333875656,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.39053118973970413,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.47852831333875656,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.39053118973970413,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.47852831333875656,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.39053118973970413,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.47852831333875656,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.39053118973970413,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.47852831333875656,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.39053118973970413,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.47852831333875656,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0069905080599710345,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03973545506596565,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.7734375,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.06233951635658741,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0049669318832457066,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0049669318832457066,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.195,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3080561844853652,
|
|
"calibration/batch_distribution_entropy": 0.9775665099409345,
|
|
"calibration/batch_entropy_100bins": 0.9680939529103568,
|
|
"calibration/batch_entropy_10bins": 0.9775665099409345,
|
|
"calibration/batch_entropy_50bins": 0.9760559175224444,
|
|
"calibration/batch_uniqueness": 0.953204345703125,
|
|
"calibration/buffer_distribution_entropy": 0.9886758278700819,
|
|
"calibration/buffer_entropy_100bins": 0.9646465788301135,
|
|
"calibration/buffer_entropy_10bins": 0.9886758278700819,
|
|
"calibration/buffer_entropy_50bins": 0.9770686160222896,
|
|
"calibration/confidence_entropy": 0.5016109504275073,
|
|
"calibration/coverage@0%": 0.005078125,
|
|
"calibration/coverage@1%": 0.005078125,
|
|
"calibration/coverage@10%": 0.04765625,
|
|
"calibration/coverage@15%": 0.087109375,
|
|
"calibration/coverage@20%": 0.164453125,
|
|
"calibration/coverage@25%": 0.394921875,
|
|
"calibration/coverage@30%": 0.550390625,
|
|
"calibration/coverage@5%": 0.005078125,
|
|
"calibration/ece": 0.14772412802918822,
|
|
"calibration/mean_confidence": 0.44700384713572605,
|
|
"calibration/prompt_uniqueness": 0.86708984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 693.6,
|
|
"completions/max_terminated_length": 462.4,
|
|
"completions/mean_length": 185.40810546875,
|
|
"completions/mean_terminated_length": 185.27640075683593,
|
|
"completions/min_length": 85.0,
|
|
"completions/min_terminated_length": 85.0,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0010472203139215708,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 350771182.0,
|
|
"reward": 0.8507241845130921,
|
|
"reward_std": 0.10451295822858811,
|
|
"rewards/accuracy_reward": 0.53642578125,
|
|
"rewards/brier_reward": 0.7645717978477478,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0025647633709013464,
|
|
"rewards/frontier_coverage_1": 0.0841044221073389,
|
|
"rewards/frontier_coverage_10": 0.0841044221073389,
|
|
"rewards/frontier_coverage_15": 0.0841044221073389,
|
|
"rewards/frontier_coverage_20": 0.0841044221073389,
|
|
"rewards/frontier_coverage_25": 0.0841044221073389,
|
|
"rewards/frontier_coverage_5": 0.0841044221073389,
|
|
"rewards/frontier_ece_reward": 0.017082036286592484,
|
|
"rewards/frontier_entropy_batch_reward": -0.1930681586265564,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.114898681640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1537907287478447,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574493408203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0574493408203125,
|
|
"signal/advantage_abs_mean": 0.08221976161003113,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08221976161003113,
|
|
"signal/advantage_pre_scale_std": 0.11908840835094452,
|
|
"signal/advantage_std": 0.11908840835094452,
|
|
"signal/brier_reward/centered_abs_mean": 0.15783025622367858,
|
|
"signal/brier_reward/group_bin_occupancy": 0.868359375,
|
|
"signal/brier_reward/group_std_mean": 0.1996555894613266,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019728782027959822,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019728782027959822,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001953614945523441,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.771875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0028803437016904354,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.496970675769262e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.496970675769262e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20813391208648682,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2666136801242828,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20813391208648682,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2666136801242828,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20813391208648682,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2666136801242828,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20813391208648682,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2666136801242828,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20813391208648682,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2666136801242828,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20813391208648682,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2666136801242828,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003725596936419606,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.033592797070741656,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.826171875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0477225124835968,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004199099633842707,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004199099633842707,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27311921715736387,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35031378269195557,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034139902144670484,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034139902144670484,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3112573154501758,
|
|
"calibration/batch_distribution_entropy": 0.9691682173397554,
|
|
"calibration/batch_entropy_100bins": 0.9620743358290158,
|
|
"calibration/batch_entropy_10bins": 0.9691682173397554,
|
|
"calibration/batch_entropy_50bins": 0.968651729542121,
|
|
"calibration/batch_uniqueness": 0.9505657025562766,
|
|
"calibration/buffer_distribution_entropy": 0.9942777995898278,
|
|
"calibration/buffer_entropy_100bins": 0.9773292841249249,
|
|
"calibration/buffer_entropy_10bins": 0.9942777995898278,
|
|
"calibration/buffer_entropy_50bins": 0.9859843966922479,
|
|
"calibration/confidence_entropy": 0.47586445065632177,
|
|
"calibration/coverage@0%": 0.03093057514101531,
|
|
"calibration/coverage@1%": 0.03093057514101531,
|
|
"calibration/coverage@10%": 0.19166808761175705,
|
|
"calibration/coverage@15%": 0.32717660608764054,
|
|
"calibration/coverage@20%": 0.3968004903159894,
|
|
"calibration/coverage@25%": 0.4562750224233529,
|
|
"calibration/coverage@30%": 0.5329890749107863,
|
|
"calibration/coverage@5%": 0.09618102720540271,
|
|
"calibration/ece": 0.1232984263006351,
|
|
"calibration/mean_confidence": 0.44606582673910006,
|
|
"calibration/prompt_uniqueness": 0.8618793594725546,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 982.8,
|
|
"completions/max_terminated_length": 697.4,
|
|
"completions/mean_length": 189.2787109375,
|
|
"completions/mean_terminated_length": 188.88477478027343,
|
|
"completions/min_length": 89.0,
|
|
"completions/min_terminated_length": 89.0,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0011104086879640818,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 367969812.0,
|
|
"reward": 0.8267355799674988,
|
|
"reward_std": 0.10339633971452714,
|
|
"rewards/accuracy_reward": 0.4732421875,
|
|
"rewards/brier_reward": 0.7793355941772461,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0028707799036055805,
|
|
"rewards/frontier_coverage_1": 0.13734577894210814,
|
|
"rewards/frontier_coverage_10": 0.13734577894210814,
|
|
"rewards/frontier_coverage_15": 0.13734577894210814,
|
|
"rewards/frontier_coverage_20": 0.13734577894210814,
|
|
"rewards/frontier_coverage_25": 0.13734577894210814,
|
|
"rewards/frontier_coverage_5": 0.13734577894210814,
|
|
"rewards/frontier_ece_reward": 0.01663174610584974,
|
|
"rewards/frontier_entropy_batch_reward": -0.19108545184135436,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10980224609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.14368323981761932,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054901123046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.054901123046875,
|
|
"signal/advantage_abs_mean": 0.08111239075660706,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08111239075660706,
|
|
"signal/advantage_pre_scale_std": 0.1185634657740593,
|
|
"signal/advantage_std": 0.1185634657740593,
|
|
"signal/brier_reward/centered_abs_mean": 0.15005984008312226,
|
|
"signal/brier_reward/group_bin_occupancy": 0.854296875,
|
|
"signal/brier_reward/group_std_mean": 0.19159983992576599,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018757480010390282,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018757480010390282,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021185804391279815,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031617959029972553,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.792258794419467e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.792258794419467e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20387679040431977,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.888671875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2581924706697464,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20387679040431977,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.888671875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2581924706697464,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20387679040431977,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.888671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2581924706697464,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20387679040431977,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.888671875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2581924706697464,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20387679040431977,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.888671875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2581924706697464,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20387679040431977,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.888671875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2581924706697464,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003649394493550062,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030297876521945,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.841015625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04242234602570534,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003787234565243125,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003787234565243125,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2668303608894348,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34631708860397337,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03335379511117935,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03335379511117935,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3787179681778446,
|
|
"calibration/batch_distribution_entropy": 0.9810928602094272,
|
|
"calibration/batch_entropy_100bins": 0.9679847491523969,
|
|
"calibration/batch_entropy_10bins": 0.9810928602094272,
|
|
"calibration/batch_entropy_50bins": 0.9765758073335764,
|
|
"calibration/batch_uniqueness": 0.9527984619140625,
|
|
"calibration/buffer_distribution_entropy": 0.9977757450664437,
|
|
"calibration/buffer_entropy_100bins": 0.9878115860267597,
|
|
"calibration/buffer_entropy_10bins": 0.9977757450664437,
|
|
"calibration/buffer_entropy_50bins": 0.992834295234476,
|
|
"calibration/confidence_entropy": 0.4810720543670087,
|
|
"calibration/coverage@0%": 0.00625,
|
|
"calibration/coverage@1%": 0.00625,
|
|
"calibration/coverage@10%": 0.044921875,
|
|
"calibration/coverage@15%": 0.07109375,
|
|
"calibration/coverage@20%": 0.137109375,
|
|
"calibration/coverage@25%": 0.33671875,
|
|
"calibration/coverage@30%": 0.45859375,
|
|
"calibration/coverage@5%": 0.0140625,
|
|
"calibration/ece": 0.141736657577259,
|
|
"calibration/mean_confidence": 0.5299977859351189,
|
|
"calibration/prompt_uniqueness": 0.8634765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 894.6,
|
|
"completions/max_terminated_length": 514.8,
|
|
"completions/mean_length": 190.746875,
|
|
"completions/mean_terminated_length": 190.48418273925782,
|
|
"completions/min_length": 90.2,
|
|
"completions/min_terminated_length": 90.2,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0009830680210143328,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 384988532.0,
|
|
"reward": 0.8328436970710754,
|
|
"reward_std": 0.10252733081579209,
|
|
"rewards/accuracy_reward": 0.49130859375,
|
|
"rewards/brier_reward": 0.774866783618927,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0033453899435698987,
|
|
"rewards/frontier_coverage_1": 0.1207592561841011,
|
|
"rewards/frontier_coverage_10": 0.1207592561841011,
|
|
"rewards/frontier_coverage_15": 0.1207592561841011,
|
|
"rewards/frontier_coverage_20": 0.1207592561841011,
|
|
"rewards/frontier_coverage_25": 0.1207592561841011,
|
|
"rewards/frontier_coverage_5": 0.1207592561841011,
|
|
"rewards/frontier_ece_reward": 0.015102808736264706,
|
|
"rewards/frontier_entropy_batch_reward": -0.19416911602020265,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.107684326171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.180078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14609776586294174,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0538421630859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0538421630859375,
|
|
"signal/advantage_abs_mean": 0.07987660020589829,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07987660020589829,
|
|
"signal/advantage_pre_scale_std": 0.11792214959859848,
|
|
"signal/advantage_std": 0.11792214959859848,
|
|
"signal/brier_reward/centered_abs_mean": 0.14661412835121154,
|
|
"signal/brier_reward/group_bin_occupancy": 0.866796875,
|
|
"signal/brier_reward/group_std_mean": 0.18781245350837708,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018326766043901443,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018326766043901443,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028235503938049077,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.773046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004136141994968057,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.054155117250048e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.054155117250048e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1856350988149643,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23784518837928773,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1856350988149643,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23784518837928773,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1856350988149643,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23784518837928773,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1856350988149643,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23784518837928773,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1856350988149643,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23784518837928773,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1856350988149643,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23784518837928773,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00332286823540926,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.029052532091736794,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03975553885102272,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036315665114670993,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036315665114670993,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2735773980617523,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35312792658805847,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03419717475771904,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03419717475771904,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3436694397661129,
|
|
"calibration/batch_distribution_entropy": 0.9729021229483618,
|
|
"calibration/batch_entropy_100bins": 0.9666308319358716,
|
|
"calibration/batch_entropy_10bins": 0.9729021229483618,
|
|
"calibration/batch_entropy_50bins": 0.9741625269875778,
|
|
"calibration/batch_uniqueness": 0.9517198658975239,
|
|
"calibration/buffer_distribution_entropy": 0.9991215376397509,
|
|
"calibration/buffer_entropy_100bins": 0.9948472735016413,
|
|
"calibration/buffer_entropy_10bins": 0.9991215376397509,
|
|
"calibration/buffer_entropy_50bins": 0.9970063375341864,
|
|
"calibration/confidence_entropy": 0.459402213935696,
|
|
"calibration/coverage@0%": 0.019553418542074364,
|
|
"calibration/coverage@1%": 0.019553418542074364,
|
|
"calibration/coverage@10%": 0.1654216609589041,
|
|
"calibration/coverage@15%": 0.2588284307729941,
|
|
"calibration/coverage@20%": 0.295582344667319,
|
|
"calibration/coverage@25%": 0.34835952788649704,
|
|
"calibration/coverage@30%": 0.4058150379158512,
|
|
"calibration/coverage@5%": 0.09621147260273973,
|
|
"calibration/ece": 0.14823283473979842,
|
|
"calibration/mean_confidence": 0.46903977021675436,
|
|
"calibration/prompt_uniqueness": 0.8525091355846774,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1344.8,
|
|
"completions/max_terminated_length": 674.6,
|
|
"completions/mean_length": 192.716796875,
|
|
"completions/mean_terminated_length": 192.05948486328126,
|
|
"completions/min_length": 96.8,
|
|
"completions/min_terminated_length": 96.8,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0008304574876092374,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 401818464.0,
|
|
"reward": 0.8498589992523193,
|
|
"reward_std": 0.10006450712680817,
|
|
"rewards/accuracy_reward": 0.526953125,
|
|
"rewards/brier_reward": 0.7873589873313904,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.003031095629557967,
|
|
"rewards/frontier_coverage_1": 0.11328938379883766,
|
|
"rewards/frontier_coverage_10": 0.11328938379883766,
|
|
"rewards/frontier_coverage_15": 0.11328938379883766,
|
|
"rewards/frontier_coverage_20": 0.11328938379883766,
|
|
"rewards/frontier_coverage_25": 0.11328938379883766,
|
|
"rewards/frontier_coverage_5": 0.11328938379883766,
|
|
"rewards/frontier_ece_reward": 0.017132452875375747,
|
|
"rewards/frontier_entropy_batch_reward": -0.2076016277074814,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10501708984375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.177734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14193961024284363,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052508544921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052508544921875,
|
|
"signal/advantage_abs_mean": 0.07748562693595887,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07748562693595887,
|
|
"signal/advantage_pre_scale_std": 0.11569896936416627,
|
|
"signal/advantage_std": 0.11569896936416627,
|
|
"signal/brier_reward/centered_abs_mean": 0.13543253839015962,
|
|
"signal/brier_reward/group_bin_occupancy": 0.837890625,
|
|
"signal/brier_reward/group_std_mean": 0.17658950984477997,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016929067298769952,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016929067298769952,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.127734375,
|
|
"signal/format_reward/group_std_mean": 0.003866990143433213,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030818260740488766,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.763671875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004590557329356671,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5164685181807724e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5164685181807724e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17714880108833314,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22984228730201722,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17714880108833314,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22984228730201722,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17714880108833314,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22984228730201722,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17714880108833314,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22984228730201722,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17714880108833314,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22984228730201722,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17714880108833314,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22984228730201722,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031709634698927403,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.026306905224919318,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.859375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03540766686201095,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032883631531149147,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032883631531149147,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27145218253135683,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34705948233604433,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033931522816419604,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033931522816419604,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4289352146594846,
|
|
"calibration/batch_distribution_entropy": 0.9840787382808802,
|
|
"calibration/batch_entropy_100bins": 0.9694645908187793,
|
|
"calibration/batch_entropy_10bins": 0.9840787382808802,
|
|
"calibration/batch_entropy_50bins": 0.9801606203621676,
|
|
"calibration/batch_uniqueness": 0.9542595829124529,
|
|
"calibration/buffer_distribution_entropy": 0.9990103579945624,
|
|
"calibration/buffer_entropy_100bins": 0.9979487473823087,
|
|
"calibration/buffer_entropy_10bins": 0.9990103579945624,
|
|
"calibration/buffer_entropy_50bins": 0.9985370679148613,
|
|
"calibration/confidence_entropy": 0.4909405142009618,
|
|
"calibration/coverage@0%": 0.003907014432485323,
|
|
"calibration/coverage@1%": 0.003907014432485323,
|
|
"calibration/coverage@10%": 0.003907014432485323,
|
|
"calibration/coverage@15%": 0.007422639432485323,
|
|
"calibration/coverage@20%": 0.016407014432485323,
|
|
"calibration/coverage@25%": 0.10820388943248531,
|
|
"calibration/coverage@30%": 0.1953132644324853,
|
|
"calibration/coverage@5%": 0.003907014432485323,
|
|
"calibration/ece": 0.17591819275681503,
|
|
"calibration/mean_confidence": 0.5040096555151118,
|
|
"calibration/prompt_uniqueness": 0.8661961518763007,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1150.2,
|
|
"completions/max_terminated_length": 636.6,
|
|
"completions/mean_length": 193.98671875,
|
|
"completions/mean_terminated_length": 193.59335632324218,
|
|
"completions/min_length": 97.6,
|
|
"completions/min_terminated_length": 97.6,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.00110912777017802,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 418841336.0,
|
|
"reward": 0.8370228409767151,
|
|
"reward_std": 0.10723992139101028,
|
|
"rewards/accuracy_reward": 0.50869140625,
|
|
"rewards/brier_reward": 0.7669232487678528,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.003695770213380456,
|
|
"rewards/frontier_coverage_1": 0.1053567928262055,
|
|
"rewards/frontier_coverage_10": 0.1053567928262055,
|
|
"rewards/frontier_coverage_15": 0.1053567928262055,
|
|
"rewards/frontier_coverage_20": 0.1053567928262055,
|
|
"rewards/frontier_coverage_25": 0.1053567928262055,
|
|
"rewards/frontier_coverage_5": 0.1053567928262055,
|
|
"rewards/frontier_ece_reward": 0.012649891711771489,
|
|
"rewards/frontier_entropy_batch_reward": -0.20658698678016663,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.121722412109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.180859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.15902018547058105,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608612060546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0608612060546875,
|
|
"signal/advantage_abs_mean": 0.08474465608596801,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08474465608596801,
|
|
"signal/advantage_pre_scale_std": 0.12552352696657182,
|
|
"signal/advantage_std": 0.12552352696657182,
|
|
"signal/brier_reward/centered_abs_mean": 0.14580391943454743,
|
|
"signal/brier_reward/group_bin_occupancy": 0.859765625,
|
|
"signal/brier_reward/group_std_mean": 0.18698894679546357,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01822548992931843,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01822548992931843,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003800245560705662,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005965401232242584,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.80243938404601e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.80243938404601e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1798011213541031,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.862890625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23461248278617858,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1798011213541031,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.862890625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23461248278617858,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1798011213541031,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.862890625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23461248278617858,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1798011213541031,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.862890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23461248278617858,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1798011213541031,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.862890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23461248278617858,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1798011213541031,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.862890625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23461248278617858,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032184398733079433,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022467482089996337,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02992837503552437,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002808435261249542,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002808435261249542,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2770455002784729,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73203125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35210344195365906,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034630687534809114,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034630687534809114,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31638217177441763,
|
|
"calibration/batch_distribution_entropy": 0.9742986092431437,
|
|
"calibration/batch_entropy_100bins": 0.9684802374319121,
|
|
"calibration/batch_entropy_10bins": 0.9742986092431437,
|
|
"calibration/batch_entropy_50bins": 0.9745079873131764,
|
|
"calibration/batch_uniqueness": 0.9527449993937459,
|
|
"calibration/buffer_distribution_entropy": 0.99893326457422,
|
|
"calibration/buffer_entropy_100bins": 0.9988161195947732,
|
|
"calibration/buffer_entropy_10bins": 0.99893326457422,
|
|
"calibration/buffer_entropy_50bins": 0.9989688448398335,
|
|
"calibration/confidence_entropy": 0.5050498236628885,
|
|
"calibration/coverage@0%": 0.01171875,
|
|
"calibration/coverage@1%": 0.01171875,
|
|
"calibration/coverage@10%": 0.045703125,
|
|
"calibration/coverage@15%": 0.071484375,
|
|
"calibration/coverage@20%": 0.15277641878669276,
|
|
"calibration/coverage@25%": 0.27740566903131114,
|
|
"calibration/coverage@30%": 0.4689112952544031,
|
|
"calibration/coverage@5%": 0.014453125,
|
|
"calibration/ece": 0.09948484508211022,
|
|
"calibration/mean_confidence": 0.5181539975937481,
|
|
"calibration/prompt_uniqueness": 0.8683413883649844,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1142.0,
|
|
"completions/max_terminated_length": 758.0,
|
|
"completions/mean_length": 196.4564453125,
|
|
"completions/mean_terminated_length": 195.80313110351562,
|
|
"completions/min_length": 101.2,
|
|
"completions/min_terminated_length": 101.2,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0009177210740745068,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 435734234.0,
|
|
"reward": 0.845232892036438,
|
|
"reward_std": 0.10218746364116668,
|
|
"rewards/accuracy_reward": 0.5146484375,
|
|
"rewards/brier_reward": 0.7825942277908325,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0030067469459027054,
|
|
"rewards/frontier_coverage_1": 0.10945327430963517,
|
|
"rewards/frontier_coverage_10": 0.10945327430963517,
|
|
"rewards/frontier_coverage_15": 0.10945327430963517,
|
|
"rewards/frontier_coverage_20": 0.10945327430963517,
|
|
"rewards/frontier_coverage_25": 0.10938042849302292,
|
|
"rewards/frontier_coverage_5": 0.10945327430963517,
|
|
"rewards/frontier_ece_reward": 0.012254784442484378,
|
|
"rewards/frontier_entropy_batch_reward": -0.18322778046131133,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.111279296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.177734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14699049890041352,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0556396484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0556396484375,
|
|
"signal/advantage_abs_mean": 0.07910501658916473,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07910501658916473,
|
|
"signal/advantage_pre_scale_std": 0.11764014065265656,
|
|
"signal/advantage_std": 0.11764014065265656,
|
|
"signal/brier_reward/centered_abs_mean": 0.14102001786231994,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85859375,
|
|
"signal/brier_reward/group_std_mean": 0.18009372055530548,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017627502232789992,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017627502232789992,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.002762135770171881,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032022904139012097,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73828125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005255010444670916,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7320995983900504e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7320995983900504e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18824252784252166,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23771241903305054,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18824252784252166,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23771241903305054,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18824252784252166,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23771241903305054,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18824252784252166,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23771241903305054,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18796592950820923,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23737676739692687,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003364589996635914,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003364589996635914,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18824252784252166,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23771241903305054,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003369541047140956,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01897584684193134,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.905078125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0248013224452734,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023719808552414177,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023719808552414177,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.258677664399147,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3334538578987122,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03233470804989338,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03233470804989338,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27110627824789024,
|
|
"calibration/batch_distribution_entropy": 0.9834757676207486,
|
|
"calibration/batch_entropy_100bins": 0.9703422906787201,
|
|
"calibration/batch_entropy_10bins": 0.9834757676207486,
|
|
"calibration/batch_entropy_50bins": 0.9793241316918639,
|
|
"calibration/batch_uniqueness": 0.954693603515625,
|
|
"calibration/buffer_distribution_entropy": 0.9993392738307401,
|
|
"calibration/buffer_entropy_100bins": 0.9991317727363503,
|
|
"calibration/buffer_entropy_10bins": 0.9993392738307401,
|
|
"calibration/buffer_entropy_50bins": 0.9992819152374992,
|
|
"calibration/confidence_entropy": 0.47524143822844167,
|
|
"calibration/coverage@0%": 0.016796875,
|
|
"calibration/coverage@1%": 0.016796875,
|
|
"calibration/coverage@10%": 0.074609375,
|
|
"calibration/coverage@15%": 0.255078125,
|
|
"calibration/coverage@20%": 0.341796875,
|
|
"calibration/coverage@25%": 0.423828125,
|
|
"calibration/coverage@30%": 0.580078125,
|
|
"calibration/coverage@5%": 0.021484375,
|
|
"calibration/ece": 0.11594602895950101,
|
|
"calibration/mean_confidence": 0.5297345627242726,
|
|
"calibration/prompt_uniqueness": 0.85546875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 903.6,
|
|
"completions/max_terminated_length": 518.6,
|
|
"completions/mean_length": 197.63076171875,
|
|
"completions/mean_terminated_length": 197.3692840576172,
|
|
"completions/min_length": 99.8,
|
|
"completions/min_terminated_length": 99.8,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0010740357683971524,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 452772309.0,
|
|
"reward": 0.8608399510383606,
|
|
"reward_std": 0.09858821481466293,
|
|
"rewards/accuracy_reward": 0.54716796875,
|
|
"rewards/brier_reward": 0.7949665904045105,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002391556603834033,
|
|
"rewards/frontier_coverage_1": 0.10390491709113121,
|
|
"rewards/frontier_coverage_10": 0.10390475168824195,
|
|
"rewards/frontier_coverage_15": 0.10390453487634659,
|
|
"rewards/frontier_coverage_20": 0.10388994812965394,
|
|
"rewards/frontier_coverage_25": 0.10309707075357437,
|
|
"rewards/frontier_coverage_5": 0.10390491709113121,
|
|
"rewards/frontier_ece_reward": 0.012549491226673126,
|
|
"rewards/frontier_entropy_batch_reward": -0.1975017488002777,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.111602783203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14663170725107194,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0558013916015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0558013916015625,
|
|
"signal/advantage_abs_mean": 0.07775494158267975,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07775494158267975,
|
|
"signal/advantage_pre_scale_std": 0.11521324068307877,
|
|
"signal/advantage_std": 0.11521324068307877,
|
|
"signal/brier_reward/centered_abs_mean": 0.1291389599442482,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85390625,
|
|
"signal/brier_reward/group_std_mean": 0.16580995321273803,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016142369993031026,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016142369993031026,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027791480533778667,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004389007203280925,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.974674666300416e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.974674666300416e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1781601697206497,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22925682067871095,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003189067030325532,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003189067030325532,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1781599998474121,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22925659418106079,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003189063956961036,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003189063956961036,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1781597375869751,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22925626039505004,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031890592537820337,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031890592537820337,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17814434170722962,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22923634946346283,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031887838151305912,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031887838151305912,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1761375993490219,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.871875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2266537368297577,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031528628896921873,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031528628896921873,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1781601697206497,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22925682067871095,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003189067030325532,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003189067030325532,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016893037036061286,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.90234375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02187432684004307,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021116296295076607,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021116296295076607,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2685338854789734,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7390625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3456457793712616,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033566735684871674,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033566735684871674,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28285148846655567,
|
|
"calibration/batch_distribution_entropy": 0.9756966652094482,
|
|
"calibration/batch_entropy_100bins": 0.9671782953324402,
|
|
"calibration/batch_entropy_10bins": 0.9756966652094482,
|
|
"calibration/batch_entropy_50bins": 0.974769557863015,
|
|
"calibration/batch_uniqueness": 0.9531880721897764,
|
|
"calibration/buffer_distribution_entropy": 0.9993609676432893,
|
|
"calibration/buffer_entropy_100bins": 0.9991891999147186,
|
|
"calibration/buffer_entropy_10bins": 0.9993609676432893,
|
|
"calibration/buffer_entropy_50bins": 0.999319530042054,
|
|
"calibration/confidence_entropy": 0.5029605620968143,
|
|
"calibration/coverage@0%": 0.025391389432485322,
|
|
"calibration/coverage@1%": 0.025391389432485322,
|
|
"calibration/coverage@10%": 0.08907167318982387,
|
|
"calibration/coverage@15%": 0.15821459148727984,
|
|
"calibration/coverage@20%": 0.23907473091976517,
|
|
"calibration/coverage@25%": 0.3297272504892368,
|
|
"calibration/coverage@30%": 0.5098833476027397,
|
|
"calibration/coverage@5%": 0.04375076443248532,
|
|
"calibration/ece": 0.11815471566014837,
|
|
"calibration/mean_confidence": 0.5428597595536488,
|
|
"calibration/prompt_uniqueness": 0.867127087262617,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1115.8,
|
|
"completions/max_terminated_length": 790.0,
|
|
"completions/mean_length": 203.92919921875,
|
|
"completions/mean_terminated_length": 203.5388946533203,
|
|
"completions/min_length": 105.0,
|
|
"completions/min_terminated_length": 105.0,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0009459942230023444,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 469813344.0,
|
|
"reward": 0.8468173861503601,
|
|
"reward_std": 0.09660987108945847,
|
|
"rewards/accuracy_reward": 0.51064453125,
|
|
"rewards/brier_reward": 0.7961806297302246,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002907540462911129,
|
|
"rewards/frontier_coverage_1": 0.12633958905935289,
|
|
"rewards/frontier_coverage_10": 0.12633958905935289,
|
|
"rewards/frontier_coverage_15": 0.12633958905935289,
|
|
"rewards/frontier_coverage_20": 0.1263102501630783,
|
|
"rewards/frontier_coverage_25": 0.1250537723302841,
|
|
"rewards/frontier_coverage_5": 0.12633958905935289,
|
|
"rewards/frontier_ece_reward": 0.010841607302427291,
|
|
"rewards/frontier_entropy_batch_reward": -0.1818355828523636,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.104315185546875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1402893543243408,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521575927734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0521575927734375,
|
|
"signal/advantage_abs_mean": 0.07476266324520112,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07476266324520112,
|
|
"signal/advantage_pre_scale_std": 0.11200970113277435,
|
|
"signal/advantage_std": 0.11200970113277435,
|
|
"signal/brier_reward/centered_abs_mean": 0.12935363054275512,
|
|
"signal/brier_reward/group_bin_occupancy": 0.855078125,
|
|
"signal/brier_reward/group_std_mean": 0.16653329730033875,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01616920381784439,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01616920381784439,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002982544107362628,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004976610559970141,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3387537627713753e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3387537627713753e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17772595584392548,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22692298889160156,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17772595584392548,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22692298889160156,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17772595584392548,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22692298889160156,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1776350975036621,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.876171875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22681189179420472,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031796682626008986,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031796682626008986,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17426885068416595,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22260749340057373,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003119412390515208,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003119412390515208,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17772595584392548,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22692298889160156,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031812945380806923,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015317396074533463,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89140625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0201519463211298,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001914674509316683,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001914674509316683,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2546141266822815,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.742578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32758485078811644,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03182676583528519,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03182676583528519,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3984116060553997,
|
|
"calibration/batch_distribution_entropy": 0.9822031101518748,
|
|
"calibration/batch_entropy_100bins": 0.9714458118149061,
|
|
"calibration/batch_entropy_10bins": 0.9822031101518748,
|
|
"calibration/batch_entropy_50bins": 0.9794551763094261,
|
|
"calibration/batch_uniqueness": 0.9540252685546875,
|
|
"calibration/buffer_distribution_entropy": 0.9991268812264866,
|
|
"calibration/buffer_entropy_100bins": 0.9990936316751196,
|
|
"calibration/buffer_entropy_10bins": 0.9991268812264866,
|
|
"calibration/buffer_entropy_50bins": 0.9991993587888806,
|
|
"calibration/confidence_entropy": 0.5101373667560415,
|
|
"calibration/coverage@0%": 0.01171875,
|
|
"calibration/coverage@1%": 0.01171875,
|
|
"calibration/coverage@10%": 0.015625,
|
|
"calibration/coverage@15%": 0.019140625,
|
|
"calibration/coverage@20%": 0.1140625,
|
|
"calibration/coverage@25%": 0.18828125,
|
|
"calibration/coverage@30%": 0.283203125,
|
|
"calibration/coverage@5%": 0.01171875,
|
|
"calibration/ece": 0.12294057415090998,
|
|
"calibration/mean_confidence": 0.4963258028783944,
|
|
"calibration/prompt_uniqueness": 0.85458984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 1060.8,
|
|
"completions/max_terminated_length": 660.8,
|
|
"completions/mean_length": 204.21494140625,
|
|
"completions/mean_terminated_length": 203.95458374023437,
|
|
"completions/min_length": 107.8,
|
|
"completions/min_terminated_length": 107.8,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0008858161745592952,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 487075321.0,
|
|
"reward": 0.8189481854438782,
|
|
"reward_std": 0.09576210975646973,
|
|
"rewards/accuracy_reward": 0.4634765625,
|
|
"rewards/brier_reward": 0.7746347069740296,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.003687644097954035,
|
|
"rewards/frontier_coverage_1": 0.14042913317680358,
|
|
"rewards/frontier_coverage_10": 0.14042913317680358,
|
|
"rewards/frontier_coverage_15": 0.14042913317680358,
|
|
"rewards/frontier_coverage_20": 0.1404130145907402,
|
|
"rewards/frontier_coverage_25": 0.13810611069202422,
|
|
"rewards/frontier_coverage_5": 0.14042913317680358,
|
|
"rewards/frontier_ece_reward": 0.008142163883894682,
|
|
"rewards/frontier_entropy_batch_reward": -0.20411013662815095,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09339599609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13170869201421737,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046697998046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.046697998046875,
|
|
"signal/advantage_abs_mean": 0.07327512502670289,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07327512502670289,
|
|
"signal/advantage_pre_scale_std": 0.11063500344753266,
|
|
"signal/advantage_std": 0.11063500344753266,
|
|
"signal/brier_reward/centered_abs_mean": 0.1346296638250351,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84765625,
|
|
"signal/brier_reward/group_std_mean": 0.17363184988498687,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01682870797812939,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01682870797812939,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032396471593528985,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005168183147907257,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.798968268209137e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.798968268209137e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17095789611339568,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22138096988201142,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17095789611339568,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22138096988201142,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17095789611339568,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22138096988201142,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17093894481658936,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2213562995195389,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030598069541156294,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030598069541156294,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16712769567966462,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.878125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2164506733417511,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029915857128798963,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029915857128798963,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17095789611339568,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22138096988201142,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00306014628149569,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0138343783095479,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01835048608481884,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017292972886934876,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017292972886934876,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27222808003425597,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34940491914749144,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034028510004281996,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034028510004281996,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2832759600511364,
|
|
"calibration/batch_distribution_entropy": 0.9715486304314798,
|
|
"calibration/batch_entropy_100bins": 0.9645395963303353,
|
|
"calibration/batch_entropy_10bins": 0.9715486304314798,
|
|
"calibration/batch_entropy_50bins": 0.9721231779983697,
|
|
"calibration/batch_uniqueness": 0.9513397216796875,
|
|
"calibration/buffer_distribution_entropy": 0.9989767926105942,
|
|
"calibration/buffer_entropy_100bins": 0.9990000341778547,
|
|
"calibration/buffer_entropy_10bins": 0.9989767926105942,
|
|
"calibration/buffer_entropy_50bins": 0.9990904829689274,
|
|
"calibration/confidence_entropy": 0.480528899956653,
|
|
"calibration/coverage@0%": 0.009765625,
|
|
"calibration/coverage@1%": 0.009765625,
|
|
"calibration/coverage@10%": 0.134375,
|
|
"calibration/coverage@15%": 0.225,
|
|
"calibration/coverage@20%": 0.310546875,
|
|
"calibration/coverage@25%": 0.396484375,
|
|
"calibration/coverage@30%": 0.5953125,
|
|
"calibration/coverage@5%": 0.026953125,
|
|
"calibration/ece": 0.14768165181713427,
|
|
"calibration/mean_confidence": 0.49541220339789815,
|
|
"calibration/prompt_uniqueness": 0.85048828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 964.8,
|
|
"completions/max_terminated_length": 635.2,
|
|
"completions/mean_length": 199.683984375,
|
|
"completions/mean_terminated_length": 199.29322509765626,
|
|
"completions/min_length": 102.6,
|
|
"completions/min_terminated_length": 102.6,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.00261081475764513,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 504168117.0,
|
|
"reward": 0.8438061833381653,
|
|
"reward_std": 0.09672794342041016,
|
|
"rewards/accuracy_reward": 0.51884765625,
|
|
"rewards/brier_reward": 0.7764919996261597,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003090168023481965,
|
|
"rewards/frontier_coverage_1": 0.11306091845035553,
|
|
"rewards/frontier_coverage_10": 0.11306091845035553,
|
|
"rewards/frontier_coverage_15": 0.11306091845035553,
|
|
"rewards/frontier_coverage_20": 0.11289113312959671,
|
|
"rewards/frontier_coverage_25": 0.11131031811237335,
|
|
"rewards/frontier_coverage_5": 0.11306091845035553,
|
|
"rewards/frontier_ece_reward": 0.008230427093803883,
|
|
"rewards/frontier_entropy_batch_reward": -0.20491617918014526,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.117669677734375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.15455419719219207,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0588348388671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0588348388671875,
|
|
"signal/advantage_abs_mean": 0.07520890831947327,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07520890831947327,
|
|
"signal/advantage_pre_scale_std": 0.11293750852346421,
|
|
"signal/advantage_std": 0.11293750852346421,
|
|
"signal/brier_reward/centered_abs_mean": 0.1358100563287735,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8375,
|
|
"signal/brier_reward/group_std_mean": 0.1739418923854828,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016976257041096687,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016976257041096687,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029037161730229855,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7359375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0047456233762204645,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1976518443552776e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1976518443552776e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19191361963748932,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.855859375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24454809129238128,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19191361963748932,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.855859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24454809129238128,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19191361963748932,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.855859375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24454809129238128,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19139576852321624,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24390313625335694,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003425984038040042,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003425984038040042,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18600209653377534,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.857421875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23725315928459167,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033294373657554387,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033294373657554387,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19191361963748932,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.855859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24454809129238128,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034352536778897045,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013514818623661995,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.901171875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.017669208720326422,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016893523279577494,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016893523279577494,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26671458780765533,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34039146900177003,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333932347595692,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333932347595692,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.5405549587051462,
|
|
"eval_calibration/batch_distribution_entropy": 0.9065406036093964,
|
|
"eval_calibration/batch_entropy_100bins": 0.6972847745692209,
|
|
"eval_calibration/batch_entropy_10bins": 0.9065406036093964,
|
|
"eval_calibration/batch_entropy_50bins": 0.7754914476517584,
|
|
"eval_calibration/batch_uniqueness": 0.89453125,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9988051667639347,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9988938324367392,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9988051667639347,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9989811313318033,
|
|
"eval_calibration/confidence_entropy": 0.48391809012155884,
|
|
"eval_calibration/coverage@0%": 0.0390625,
|
|
"eval_calibration/coverage@1%": 0.0390625,
|
|
"eval_calibration/coverage@10%": 0.0390625,
|
|
"eval_calibration/coverage@15%": 0.0390625,
|
|
"eval_calibration/coverage@20%": 0.046875,
|
|
"eval_calibration/coverage@25%": 0.046875,
|
|
"eval_calibration/coverage@30%": 0.046875,
|
|
"eval_calibration/coverage@5%": 0.0390625,
|
|
"eval_calibration/ece": 0.2232385876510525,
|
|
"eval_calibration/mean_confidence": 0.4669556942027052,
|
|
"eval_calibration/prompt_uniqueness": 0.89453125,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 387.0,
|
|
"eval_completions/max_terminated_length": 387.0,
|
|
"eval_completions/mean_length": 198.34226989746094,
|
|
"eval_completions/mean_terminated_length": 198.34226989746094,
|
|
"eval_completions/min_length": 125.0,
|
|
"eval_completions/min_terminated_length": 125.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 504168117.0,
|
|
"eval_reward": 0.699365958571434,
|
|
"eval_reward_std": 0.21834751963615417,
|
|
"eval_rewards/accuracy_reward": 0.412109375,
|
|
"eval_rewards/brier_reward": 0.7784133553504944,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.004543848393950611,
|
|
"eval_rewards/frontier_coverage_1": 0.18927186354994774,
|
|
"eval_rewards/frontier_coverage_10": 0.18927159160375595,
|
|
"eval_rewards/frontier_coverage_15": 0.18926333636045456,
|
|
"eval_rewards/frontier_coverage_20": 0.18883728608489037,
|
|
"eval_rewards/frontier_coverage_25": 0.17392469197511673,
|
|
"eval_rewards/frontier_coverage_5": 0.18927186354994774,
|
|
"eval_rewards/frontier_ece_reward": 0.00836634065490216,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 19.6978,
|
|
"eval_samples_per_second": 25.384,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4649658203125,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4890812262892723,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23248291015625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23248291015625,
|
|
"eval_signal/advantage_abs_mean": 0.19869232177734375,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19869232177734375,
|
|
"eval_signal/advantage_pre_scale_std": 0.2162884622812271,
|
|
"eval_signal/advantage_std": 0.2162884622812271,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20233283191919327,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.9140625,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2505420297384262,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02529160398989916,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02529160398989916,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005963263858575374,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.78125,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.010063497698865831,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010674241821106989,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010674241821106989,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36536306887865067,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4483560249209404,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00653999880887568,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00653999880887568,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3653620555996895,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4483548328280449,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0065399802988395095,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0065399802988395095,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.365331307053566,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.448319248855114,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0065394300036132336,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0065394300036132336,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.36427226662635803,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4470879137516022,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006520473049022257,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006520473049022257,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.33615638315677643,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4133630245923996,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006017199018970132,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006017199018970132,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36536306887865067,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4483560249209404,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00653999880887568,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00653999880887568,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.01719106500968337,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9296875,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.022434783168137074,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021488831262104213,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021488831262104213,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.203,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3869300543313942,
|
|
"calibration/batch_distribution_entropy": 0.9808689690281369,
|
|
"calibration/batch_entropy_100bins": 0.9674042155001947,
|
|
"calibration/batch_entropy_10bins": 0.9808689690281369,
|
|
"calibration/batch_entropy_50bins": 0.976446592128552,
|
|
"calibration/batch_uniqueness": 0.9534576416015625,
|
|
"calibration/buffer_distribution_entropy": 0.9986828299800502,
|
|
"calibration/buffer_entropy_100bins": 0.9988154597442325,
|
|
"calibration/buffer_entropy_10bins": 0.9986828299800502,
|
|
"calibration/buffer_entropy_50bins": 0.9988999650475596,
|
|
"calibration/confidence_entropy": 0.4790370333140571,
|
|
"calibration/coverage@0%": 0.023828125,
|
|
"calibration/coverage@1%": 0.023828125,
|
|
"calibration/coverage@10%": 0.09765625,
|
|
"calibration/coverage@15%": 0.1453125,
|
|
"calibration/coverage@20%": 0.20390625,
|
|
"calibration/coverage@25%": 0.250390625,
|
|
"calibration/coverage@30%": 0.290625,
|
|
"calibration/coverage@5%": 0.053515625,
|
|
"calibration/ece": 0.13844372663002122,
|
|
"calibration/mean_confidence": 0.5048848300838907,
|
|
"calibration/prompt_uniqueness": 0.851220703125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 460.8,
|
|
"completions/max_terminated_length": 460.8,
|
|
"completions/mean_length": 195.812890625,
|
|
"completions/mean_terminated_length": 195.812890625,
|
|
"completions/min_length": 98.6,
|
|
"completions/min_terminated_length": 98.6,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0008509118924848735,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 521481081.0,
|
|
"reward": 0.8582545042037963,
|
|
"reward_std": 0.09380114525556564,
|
|
"rewards/accuracy_reward": 0.5439453125,
|
|
"rewards/brier_reward": 0.7857403755187988,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.003081470658071339,
|
|
"rewards/frontier_coverage_1": 0.10034325867891311,
|
|
"rewards/frontier_coverage_10": 0.10034310221672058,
|
|
"rewards/frontier_coverage_15": 0.10032327324151993,
|
|
"rewards/frontier_coverage_20": 0.10001767575740814,
|
|
"rewards/frontier_coverage_25": 0.09081147015094757,
|
|
"rewards/frontier_coverage_5": 0.10034325867891311,
|
|
"rewards/frontier_ece_reward": 0.007945819105952979,
|
|
"rewards/frontier_entropy_batch_reward": -0.18739983737468718,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0947265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13038647025823594,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04736328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04736328125,
|
|
"signal/advantage_abs_mean": 0.07237804681062698,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07237804681062698,
|
|
"signal/advantage_pre_scale_std": 0.10942972600460052,
|
|
"signal/advantage_std": 0.10942972600460052,
|
|
"signal/brier_reward/centered_abs_mean": 0.12652941197156906,
|
|
"signal/brier_reward/group_bin_occupancy": 0.849609375,
|
|
"signal/brier_reward/group_std_mean": 0.1622232437133789,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015816176496446132,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015816176496446132,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003096145251765847,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.718359375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005016565602272749,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.542099897866137e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.542099897866137e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1638656437397003,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21250625550746918,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002933195047080517,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002933195047080517,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16386164724826813,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21250071823596955,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029331233818084,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029331233818084,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16379604637622833,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21241317689418793,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029319490771740676,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029319490771740676,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1630973845720291,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2114973783493042,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002919443091377616,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002919443091377616,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1438317209482193,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18710974752902984,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00257458770647645,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00257458770647645,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1638656437397003,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21250625550746918,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002933195047080517,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002933195047080517,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011986837163567543,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.015694568678736687,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001498354645445943,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001498354645445943,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26680760979652407,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34285420179367065,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03335095122456551,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03335095122456551,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31852894466439724,
|
|
"calibration/batch_distribution_entropy": 0.9846605362740333,
|
|
"calibration/batch_entropy_100bins": 0.9731097753973357,
|
|
"calibration/batch_entropy_10bins": 0.9846605362740333,
|
|
"calibration/batch_entropy_50bins": 0.9797415936230249,
|
|
"calibration/batch_uniqueness": 0.954290771484375,
|
|
"calibration/buffer_distribution_entropy": 0.9985237641268538,
|
|
"calibration/buffer_entropy_100bins": 0.9987331459546469,
|
|
"calibration/buffer_entropy_10bins": 0.9985237641268538,
|
|
"calibration/buffer_entropy_50bins": 0.9987960418931671,
|
|
"calibration/confidence_entropy": 0.5095141511134974,
|
|
"calibration/coverage@0%": 0.012890625,
|
|
"calibration/coverage@1%": 0.012890625,
|
|
"calibration/coverage@10%": 0.176953125,
|
|
"calibration/coverage@15%": 0.29765625,
|
|
"calibration/coverage@20%": 0.383203125,
|
|
"calibration/coverage@25%": 0.45546875,
|
|
"calibration/coverage@30%": 0.49453125,
|
|
"calibration/coverage@5%": 0.069140625,
|
|
"calibration/ece": 0.1362378664495895,
|
|
"calibration/mean_confidence": 0.5084870717219879,
|
|
"calibration/prompt_uniqueness": 0.86162109375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 896.2,
|
|
"completions/max_terminated_length": 459.0,
|
|
"completions/mean_length": 189.36025390625,
|
|
"completions/mean_terminated_length": 189.0965362548828,
|
|
"completions/min_length": 101.0,
|
|
"completions/min_terminated_length": 101.0,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.00109315593726933,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 538565794.0,
|
|
"reward": 0.8589414358139038,
|
|
"reward_std": 0.09629883468151093,
|
|
"rewards/accuracy_reward": 0.5396484375,
|
|
"rewards/brier_reward": 0.8017237544059753,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002702287444844842,
|
|
"rewards/frontier_coverage_1": 0.11047709956765175,
|
|
"rewards/frontier_coverage_10": 0.11047184318304062,
|
|
"rewards/frontier_coverage_15": 0.11041708588600159,
|
|
"rewards/frontier_coverage_20": 0.10978015959262848,
|
|
"rewards/frontier_coverage_25": 0.09341206625103951,
|
|
"rewards/frontier_coverage_5": 0.11047709956765175,
|
|
"rewards/frontier_ece_reward": 0.008117536641657352,
|
|
"rewards/frontier_entropy_batch_reward": -0.18771363496780397,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1011474609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.175,
|
|
"signal/accuracy_reward/group_std_mean": 0.13669176101684571,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05057373046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05057373046875,
|
|
"signal/advantage_abs_mean": 0.07451938837766647,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07451938837766647,
|
|
"signal/advantage_pre_scale_std": 0.11410035341978073,
|
|
"signal/advantage_std": 0.11410035341978073,
|
|
"signal/brier_reward/centered_abs_mean": 0.12140367329120635,
|
|
"signal/brier_reward/group_bin_occupancy": 0.844921875,
|
|
"signal/brier_reward/group_std_mean": 0.15884515047073364,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015175459161400794,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015175459161400794,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002908071083948016,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00480275945737958,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.205447159823961e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.205447159823961e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15683222711086273,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20588673055171966,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028072968125343323,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028072968125343323,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15682255029678344,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20587407648563386,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028071236796677113,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028071236796677113,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15671851933002473,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20573811531066893,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002805261267349124,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002805261267349124,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15555653274059295,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20422441959381105,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027844619005918505,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027844619005918505,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1274886041879654,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.85703125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16840324103832244,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022820457816123962,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022820457816123962,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15683222711086273,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20588673055171966,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028072968125343323,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028072968125343323,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010539719834923744,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.899609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013909543678164483,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001317464979365468,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001317464979365468,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2608456969261169,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7421875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33714223504066465,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032605712115764615,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032605712115764615,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21917538463284222,
|
|
"calibration/batch_distribution_entropy": 0.9878817709654515,
|
|
"calibration/batch_entropy_100bins": 0.9757889859436031,
|
|
"calibration/batch_entropy_10bins": 0.9878817709654515,
|
|
"calibration/batch_entropy_50bins": 0.983224028911913,
|
|
"calibration/batch_uniqueness": 0.9545867919921875,
|
|
"calibration/buffer_distribution_entropy": 0.9985031862617003,
|
|
"calibration/buffer_entropy_100bins": 0.9987566386866493,
|
|
"calibration/buffer_entropy_10bins": 0.9985031862617003,
|
|
"calibration/buffer_entropy_50bins": 0.9987999633528707,
|
|
"calibration/confidence_entropy": 0.4950461976985136,
|
|
"calibration/coverage@0%": 0.0078125,
|
|
"calibration/coverage@1%": 0.0078125,
|
|
"calibration/coverage@10%": 0.25234375,
|
|
"calibration/coverage@15%": 0.36328125,
|
|
"calibration/coverage@20%": 0.51875,
|
|
"calibration/coverage@25%": 0.65703125,
|
|
"calibration/coverage@30%": 0.7484375,
|
|
"calibration/coverage@5%": 0.141796875,
|
|
"calibration/ece": 0.12159026779133557,
|
|
"calibration/mean_confidence": 0.5102481512964239,
|
|
"calibration/prompt_uniqueness": 0.847412109375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 686.2,
|
|
"completions/max_terminated_length": 686.2,
|
|
"completions/mean_length": 187.4068359375,
|
|
"completions/mean_terminated_length": 187.4068359375,
|
|
"completions/min_length": 93.4,
|
|
"completions/min_terminated_length": 93.4,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0009646462858654559,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 555514376.0,
|
|
"reward": 0.8567448496818543,
|
|
"reward_std": 0.0943350225687027,
|
|
"rewards/accuracy_reward": 0.53310546875,
|
|
"rewards/brier_reward": 0.8055103659629822,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0025082074804231524,
|
|
"rewards/frontier_coverage_1": 0.1262974977493286,
|
|
"rewards/frontier_coverage_10": 0.12625774666666983,
|
|
"rewards/frontier_coverage_15": 0.1262197308242321,
|
|
"rewards/frontier_coverage_20": 0.12498710155487061,
|
|
"rewards/frontier_coverage_25": 0.09902235716581345,
|
|
"rewards/frontier_coverage_5": 0.1262974977493286,
|
|
"rewards/frontier_ece_reward": 0.006973131839185953,
|
|
"rewards/frontier_entropy_batch_reward": -0.19499198198318482,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.108697509765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.175,
|
|
"signal/accuracy_reward/group_std_mean": 0.1418474718928337,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0543487548828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0543487548828125,
|
|
"signal/advantage_abs_mean": 0.07427967190742493,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07427967190742493,
|
|
"signal/advantage_pre_scale_std": 0.1111309289932251,
|
|
"signal/advantage_std": 0.1111309289932251,
|
|
"signal/brier_reward/centered_abs_mean": 0.11962604224681854,
|
|
"signal/brier_reward/group_bin_occupancy": 0.844921875,
|
|
"signal/brier_reward/group_std_mean": 0.1540861427783966,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014953255280852317,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014953255280852317,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002598578087054193,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004507921310141683,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6514547284459697e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6514547284459697e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17175144851207733,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21923006176948548,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030743507202714683,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030743507202714683,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1716869741678238,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21915002167224884,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003073196718469262,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003073196718469262,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17162050902843476,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21906675398349762,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030720070470124485,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030720070470124485,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16946081519126893,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88359375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21636516749858856,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030333484522998334,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030333484522998334,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12273151576519012,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15797219574451446,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021968940272927284,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021968940272927284,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17175144851207733,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21923006176948548,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030743507202714683,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030743507202714683,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009134939312934876,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.883203125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012016034871339797,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011418674141168595,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011418674141168595,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2628565192222595,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.728515625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3387132942676544,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03285706490278244,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03285706490278244,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2330965079108403,
|
|
"calibration/batch_distribution_entropy": 0.9783253458334003,
|
|
"calibration/batch_entropy_100bins": 0.967895082463165,
|
|
"calibration/batch_entropy_10bins": 0.9783253458334003,
|
|
"calibration/batch_entropy_50bins": 0.9760524996239462,
|
|
"calibration/batch_uniqueness": 0.9533475407324336,
|
|
"calibration/buffer_distribution_entropy": 0.9984517660348228,
|
|
"calibration/buffer_entropy_100bins": 0.9987360351808527,
|
|
"calibration/buffer_entropy_10bins": 0.9984517660348228,
|
|
"calibration/buffer_entropy_50bins": 0.9987687423048592,
|
|
"calibration/confidence_entropy": 0.47645242697701995,
|
|
"calibration/coverage@0%": 0.038713154354207434,
|
|
"calibration/coverage@1%": 0.038713154354207434,
|
|
"calibration/coverage@10%": 0.13606210249510764,
|
|
"calibration/coverage@15%": 0.2814059442270059,
|
|
"calibration/coverage@20%": 0.48266802226027394,
|
|
"calibration/coverage@25%": 0.6222021771037183,
|
|
"calibration/coverage@30%": 0.7140311582681018,
|
|
"calibration/coverage@5%": 0.07820908757338552,
|
|
"calibration/ece": 0.09310715311335953,
|
|
"calibration/mean_confidence": 0.5309102969282397,
|
|
"calibration/prompt_uniqueness": 0.8458068276047086,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 845.2,
|
|
"completions/max_terminated_length": 624.6,
|
|
"completions/mean_length": 188.4822265625,
|
|
"completions/mean_terminated_length": 188.35069885253907,
|
|
"completions/min_length": 99.4,
|
|
"completions/min_terminated_length": 99.4,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.0012510113883763552,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 572608018.0,
|
|
"reward": 0.8668020844459534,
|
|
"reward_std": 0.0995595932006836,
|
|
"rewards/accuracy_reward": 0.571484375,
|
|
"rewards/brier_reward": 0.787188982963562,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002780396491289139,
|
|
"rewards/frontier_coverage_1": 0.07695508673787117,
|
|
"rewards/frontier_coverage_10": 0.07694511339068413,
|
|
"rewards/frontier_coverage_15": 0.07687427774071694,
|
|
"rewards/frontier_coverage_20": 0.07620680481195449,
|
|
"rewards/frontier_coverage_25": 0.05764272883534431,
|
|
"rewards/frontier_coverage_5": 0.07695508673787117,
|
|
"rewards/frontier_ece_reward": 0.005536333145573735,
|
|
"rewards/frontier_entropy_batch_reward": -0.2059100717306137,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1152587890625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.178515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15163064002990723,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05762939453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05762939453125,
|
|
"signal/advantage_abs_mean": 0.0772314801812172,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0772314801812172,
|
|
"signal/advantage_pre_scale_std": 0.11531815230846405,
|
|
"signal/advantage_std": 0.11531815230846405,
|
|
"signal/brier_reward/centered_abs_mean": 0.13076411485671996,
|
|
"signal/brier_reward/group_bin_occupancy": 0.857421875,
|
|
"signal/brier_reward/group_std_mean": 0.1675712913274765,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016345514357089995,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016345514357089995,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002782534621655941,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004524756595492363,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9807369941845535e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9807369941845535e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17586564421653747,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22499242424964905,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031479948200285436,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031479948200285436,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1757751613855362,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2248790979385376,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031463753432035444,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031463753432035444,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17549155354499818,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22451838254928588,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031412987038493155,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031412987038493155,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1719941407442093,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2201235145330429,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003078695107251406,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003078695107251406,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1134360283613205,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14679449796676636,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002030504820868373,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002030504820868373,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17586564421653747,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22499242424964905,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031479948200285436,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031479948200285436,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009148731268942356,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011937451735138892,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011435914086177946,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011435914086177946,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2780795097351074,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.351616758108139,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03475993871688843,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03475993871688843,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2589022891995325,
|
|
"calibration/batch_distribution_entropy": 0.9807805330573605,
|
|
"calibration/batch_entropy_100bins": 0.9685901941968815,
|
|
"calibration/batch_entropy_10bins": 0.9807805330573605,
|
|
"calibration/batch_entropy_50bins": 0.977349875364441,
|
|
"calibration/batch_uniqueness": 0.9529144287109375,
|
|
"calibration/buffer_distribution_entropy": 0.9982041576732013,
|
|
"calibration/buffer_entropy_100bins": 0.99861205511106,
|
|
"calibration/buffer_entropy_10bins": 0.9982041576732013,
|
|
"calibration/buffer_entropy_50bins": 0.9986009396983668,
|
|
"calibration/confidence_entropy": 0.487472109955439,
|
|
"calibration/coverage@0%": 0.082421875,
|
|
"calibration/coverage@1%": 0.09140625,
|
|
"calibration/coverage@10%": 0.228125,
|
|
"calibration/coverage@15%": 0.305859375,
|
|
"calibration/coverage@20%": 0.354296875,
|
|
"calibration/coverage@25%": 0.4875,
|
|
"calibration/coverage@30%": 0.617578125,
|
|
"calibration/coverage@5%": 0.18359375,
|
|
"calibration/ece": 0.11482706723425573,
|
|
"calibration/mean_confidence": 0.5064478468870471,
|
|
"calibration/prompt_uniqueness": 0.852490234375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 447.6,
|
|
"completions/max_terminated_length": 447.6,
|
|
"completions/mean_length": 188.24775390625,
|
|
"completions/mean_terminated_length": 188.24775390625,
|
|
"completions/min_length": 98.6,
|
|
"completions/min_terminated_length": 98.6,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0009781933622434735,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 589357083.0,
|
|
"reward": 0.8498636603355407,
|
|
"reward_std": 0.09095648676156998,
|
|
"rewards/accuracy_reward": 0.526171875,
|
|
"rewards/brier_reward": 0.8025665879249573,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0028386770747601984,
|
|
"rewards/frontier_coverage_1": 0.11719954907894134,
|
|
"rewards/frontier_coverage_10": 0.11714765727519989,
|
|
"rewards/frontier_coverage_15": 0.11693512350320816,
|
|
"rewards/frontier_coverage_20": 0.11193432807922363,
|
|
"rewards/frontier_coverage_25": 0.07785176485776901,
|
|
"rewards/frontier_coverage_5": 0.11719954907894134,
|
|
"rewards/frontier_ece_reward": 0.005842031445354223,
|
|
"rewards/frontier_entropy_batch_reward": -0.2080444574356079,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091064453125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12434282898902893,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455322265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0455322265625,
|
|
"signal/advantage_abs_mean": 0.07068178355693817,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07068178355693817,
|
|
"signal/advantage_pre_scale_std": 0.10720473378896714,
|
|
"signal/advantage_std": 0.10720473378896714,
|
|
"signal/brier_reward/centered_abs_mean": 0.12168123424053193,
|
|
"signal/brier_reward/group_bin_occupancy": 0.83828125,
|
|
"signal/brier_reward/group_std_mean": 0.1573301523923874,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01521015428006649,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01521015428006649,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028686066623777153,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716015625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004602818004786968,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.134805833222345e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.134805833222345e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1588267892599106,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2065411925315857,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002842999389395118,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002842999389395118,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15874530375003815,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8515625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20643724501132965,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002841540891677141,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002841540891677141,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15840073227882384,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8515625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20599766075611115,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002835373068228364,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002835373068228364,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14993982166051864,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.846484375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19521004855632781,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026839226484298706,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026839226484298706,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09466438889503478,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12403950989246368,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001694492483511567,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001694492483511567,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1588267892599106,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2065411925315857,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002842999389395118,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002842999389395118,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008115557208657264,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01066547017544508,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001014444651082158,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001014444651082158,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.274140340089798,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34681135416030884,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03426754251122475,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03426754251122475,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28829373194083824,
|
|
"calibration/batch_distribution_entropy": 0.9841373980772424,
|
|
"calibration/batch_entropy_100bins": 0.9681536817665121,
|
|
"calibration/batch_entropy_10bins": 0.9841373980772424,
|
|
"calibration/batch_entropy_50bins": 0.9792620166416952,
|
|
"calibration/batch_uniqueness": 0.9536224365234375,
|
|
"calibration/buffer_distribution_entropy": 0.998297768682672,
|
|
"calibration/buffer_entropy_100bins": 0.9986629530424844,
|
|
"calibration/buffer_entropy_10bins": 0.998297768682672,
|
|
"calibration/buffer_entropy_50bins": 0.9986413654675284,
|
|
"calibration/confidence_entropy": 0.48650558811910427,
|
|
"calibration/coverage@0%": 0.019140625,
|
|
"calibration/coverage@1%": 0.019140625,
|
|
"calibration/coverage@10%": 0.109765625,
|
|
"calibration/coverage@15%": 0.1859375,
|
|
"calibration/coverage@20%": 0.351171875,
|
|
"calibration/coverage@25%": 0.4765625,
|
|
"calibration/coverage@30%": 0.573828125,
|
|
"calibration/coverage@5%": 0.079296875,
|
|
"calibration/ece": 0.10345090124198557,
|
|
"calibration/mean_confidence": 0.4916340363422075,
|
|
"calibration/prompt_uniqueness": 0.84775390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 865.6,
|
|
"completions/max_terminated_length": 421.2,
|
|
"completions/mean_length": 185.69755859375,
|
|
"completions/mean_terminated_length": 185.43428955078124,
|
|
"completions/min_length": 93.6,
|
|
"completions/min_terminated_length": 93.6,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0009465691982768476,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 606445250.0,
|
|
"reward": 0.8475932121276856,
|
|
"reward_std": 0.08987626880407333,
|
|
"rewards/accuracy_reward": 0.5220703125,
|
|
"rewards/brier_reward": 0.7915264964103699,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0030013061594218017,
|
|
"rewards/frontier_coverage_1": 0.12116425856947899,
|
|
"rewards/frontier_coverage_10": 0.12115439549088478,
|
|
"rewards/frontier_coverage_15": 0.12109048813581466,
|
|
"rewards/frontier_coverage_20": 0.11619948148727417,
|
|
"rewards/frontier_coverage_25": 0.07896296977996826,
|
|
"rewards/frontier_coverage_5": 0.12116425856947899,
|
|
"rewards/frontier_ece_reward": 0.005212780460715294,
|
|
"rewards/frontier_entropy_batch_reward": -0.20040208101272583,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09073486328125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12525396645069123,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045367431640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045367431640625,
|
|
"signal/advantage_abs_mean": 0.06918673142790795,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06918673142790795,
|
|
"signal/advantage_pre_scale_std": 0.1060228943824768,
|
|
"signal/advantage_std": 0.1060228943824768,
|
|
"signal/brier_reward/centered_abs_mean": 0.12269987463951111,
|
|
"signal/brier_reward/group_bin_occupancy": 0.836328125,
|
|
"signal/brier_reward/group_std_mean": 0.158928182721138,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015337484329938889,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015337484329938889,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027809354942291975,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708203125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004555220529437065,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9778743414208296e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9778743414208296e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16433233320713042,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86171875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21237687766551971,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002941548731178045,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002941548731178045,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16424158215522766,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86171875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2122596561908722,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029399242252111436,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029399242252111436,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16389403641223907,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2118108332157135,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002933703176677227,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002933703176677227,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15368525087833404,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.858203125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19865911304950715,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027509658131748436,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027509658131748436,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09549619555473328,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87265625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12401848435401916,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017093818169087172,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017093818169087172,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16433233320713042,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86171875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21237687766551971,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002941548731178045,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002941548731178045,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007696983031928539,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.880078125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01010540798306465,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009621228789910674,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009621228789910674,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2615818977355957,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33764955401420593,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03269773721694946,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03269773721694946,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28347549862131505,
|
|
"calibration/batch_distribution_entropy": 0.980011133919852,
|
|
"calibration/batch_entropy_100bins": 0.9688122443099946,
|
|
"calibration/batch_entropy_10bins": 0.980011133919852,
|
|
"calibration/batch_entropy_50bins": 0.9770535189448722,
|
|
"calibration/batch_uniqueness": 0.9536908440564685,
|
|
"calibration/buffer_distribution_entropy": 0.9983131546794096,
|
|
"calibration/buffer_entropy_100bins": 0.9986737401524538,
|
|
"calibration/buffer_entropy_10bins": 0.9983131546794096,
|
|
"calibration/buffer_entropy_50bins": 0.9986542446646809,
|
|
"calibration/confidence_entropy": 0.47803387557048865,
|
|
"calibration/coverage@0%": 0.0633347602739726,
|
|
"calibration/coverage@1%": 0.0633347602739726,
|
|
"calibration/coverage@10%": 0.2251460066046967,
|
|
"calibration/coverage@15%": 0.3561093444227006,
|
|
"calibration/coverage@20%": 0.48078828277886493,
|
|
"calibration/coverage@25%": 0.5538772015655578,
|
|
"calibration/coverage@30%": 0.628125,
|
|
"calibration/coverage@5%": 0.11884326076320939,
|
|
"calibration/ece": 0.12117534786829438,
|
|
"calibration/mean_confidence": 0.4956464571020572,
|
|
"calibration/prompt_uniqueness": 0.8459324800013007,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 899.6,
|
|
"completions/max_terminated_length": 447.2,
|
|
"completions/mean_length": 181.71953125,
|
|
"completions/mean_terminated_length": 181.45542602539064,
|
|
"completions/min_length": 94.0,
|
|
"completions/min_terminated_length": 94.0,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0011055340291932225,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 623473770.0,
|
|
"reward": 0.8469202160835266,
|
|
"reward_std": 0.08769658207893372,
|
|
"rewards/accuracy_reward": 0.5228515625,
|
|
"rewards/brier_reward": 0.7960270881652832,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002862662449479103,
|
|
"rewards/frontier_coverage_1": 0.1255490630865097,
|
|
"rewards/frontier_coverage_10": 0.12550750821828843,
|
|
"rewards/frontier_coverage_15": 0.12535116225481033,
|
|
"rewards/frontier_coverage_20": 0.11695131063461303,
|
|
"rewards/frontier_coverage_25": 0.07410136461257935,
|
|
"rewards/frontier_coverage_5": 0.1255490630865097,
|
|
"rewards/frontier_ece_reward": 0.005222787708044052,
|
|
"rewards/frontier_entropy_batch_reward": -0.2153420329093933,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09036865234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.170703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12226001918315887,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045184326171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045184326171875,
|
|
"signal/advantage_abs_mean": 0.06837325692176818,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06837325692176818,
|
|
"signal/advantage_pre_scale_std": 0.10427495390176773,
|
|
"signal/advantage_std": 0.10427495390176773,
|
|
"signal/brier_reward/centered_abs_mean": 0.11565729826688767,
|
|
"signal/brier_reward/group_bin_occupancy": 0.840625,
|
|
"signal/brier_reward/group_std_mean": 0.14894945323467254,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014457162283360959,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014457162283360959,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026661295210942625,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004355709021911025,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7723716852488e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7723716852488e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16364216804504395,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2090536832809448,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029291946906596423,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029291946906596423,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.163558030128479,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20894888639450074,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002927688602358103,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002927688602358103,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1632261723279953,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20853422582149506,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002921748394146562,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002921748394146562,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15006764531135558,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.860546875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19213563203811646,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026862107682973147,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026862107682973147,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08844952881336213,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.880078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11421704292297363,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015832465374842285,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015832465374842285,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16364216804504395,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2090536832809448,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029291946906596423,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029291946906596423,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007427510805428028,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009686007350683212,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009284388506785035,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009284388506785035,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2786764442920685,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726171875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3537044942378998,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03483455553650856,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03483455553650856,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23715687440798652,
|
|
"calibration/batch_distribution_entropy": 0.9721351094227396,
|
|
"calibration/batch_entropy_100bins": 0.9636682399300932,
|
|
"calibration/batch_entropy_10bins": 0.9721351094227396,
|
|
"calibration/batch_entropy_50bins": 0.9708812273088254,
|
|
"calibration/batch_uniqueness": 0.9519918907307791,
|
|
"calibration/buffer_distribution_entropy": 0.9983129150316932,
|
|
"calibration/buffer_entropy_100bins": 0.9986866998459007,
|
|
"calibration/buffer_entropy_10bins": 0.9983129150316932,
|
|
"calibration/buffer_entropy_50bins": 0.9986616817533553,
|
|
"calibration/confidence_entropy": 0.4812024814334549,
|
|
"calibration/coverage@0%": 0.04922257216242661,
|
|
"calibration/coverage@1%": 0.05976944716242662,
|
|
"calibration/coverage@10%": 0.22308815435420745,
|
|
"calibration/coverage@15%": 0.3262383806262231,
|
|
"calibration/coverage@20%": 0.47001360689823873,
|
|
"calibration/coverage@25%": 0.5852846746575342,
|
|
"calibration/coverage@30%": 0.7067835738747554,
|
|
"calibration/coverage@5%": 0.1296913221624266,
|
|
"calibration/ece": 0.10479527222992176,
|
|
"calibration/mean_confidence": 0.4769995498007412,
|
|
"calibration/prompt_uniqueness": 0.84160999691077,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 663.6,
|
|
"completions/max_terminated_length": 431.6,
|
|
"completions/mean_length": 178.928125,
|
|
"completions/mean_terminated_length": 178.79558715820312,
|
|
"completions/min_length": 87.0,
|
|
"completions/min_terminated_length": 87.0,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0008596270345151424,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 640305482.0,
|
|
"reward": 0.8485802054405213,
|
|
"reward_std": 0.08481966853141784,
|
|
"rewards/accuracy_reward": 0.520703125,
|
|
"rewards/brier_reward": 0.8082961440086365,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002439832640811801,
|
|
"rewards/frontier_coverage_1": 0.14013661593198776,
|
|
"rewards/frontier_coverage_10": 0.14008433520793914,
|
|
"rewards/frontier_coverage_15": 0.1397281616926193,
|
|
"rewards/frontier_coverage_20": 0.12882789671421052,
|
|
"rewards/frontier_coverage_25": 0.08048931509256363,
|
|
"rewards/frontier_coverage_5": 0.14013533443212509,
|
|
"rewards/frontier_ece_reward": 0.005031970608979463,
|
|
"rewards/frontier_entropy_batch_reward": -0.216937056183815,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0880615234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1184210166335106,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04403076171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04403076171875,
|
|
"signal/advantage_abs_mean": 0.06550839766860009,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06550839766860009,
|
|
"signal/advantage_pre_scale_std": 0.09949304610490799,
|
|
"signal/advantage_std": 0.09949304610490799,
|
|
"signal/brier_reward/centered_abs_mean": 0.11575733423233033,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84140625,
|
|
"signal/brier_reward/group_std_mean": 0.1480691760778427,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01446966677904129,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01446966677904129,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002259706752374768,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036888211499899624,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.04487487685401e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.04487487685401e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16878078281879424,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21479279398918152,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030211757868528364,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030211757868528364,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16870121657848358,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21468909978866577,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030197515618056057,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030197515618056057,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16824153959751129,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2140854448080063,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030115234199911355,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030115234199911355,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1515140563249588,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19256215989589692,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027121015824377536,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027121015824377536,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08549174815416336,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.89453125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10916633754968644,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015303022461012005,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015303022461012005,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16878008842468262,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21479184925556183,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030211633536964657,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030211633536964657,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006747147906571627,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00879486370831728,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008433934883214534,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008433934883214534,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2829363703727722,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3577865481376648,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035367046296596524,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035367046296596524,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24532003825815796,
|
|
"calibration/batch_distribution_entropy": 0.981679767990388,
|
|
"calibration/batch_entropy_100bins": 0.9709472161360031,
|
|
"calibration/batch_entropy_10bins": 0.981679767990388,
|
|
"calibration/batch_entropy_50bins": 0.9798056822605836,
|
|
"calibration/batch_uniqueness": 0.95435791015625,
|
|
"calibration/buffer_distribution_entropy": 0.9985313752795584,
|
|
"calibration/buffer_entropy_100bins": 0.9988054011236678,
|
|
"calibration/buffer_entropy_10bins": 0.9985313752795584,
|
|
"calibration/buffer_entropy_50bins": 0.9987929314111103,
|
|
"calibration/confidence_entropy": 0.5201001362906903,
|
|
"calibration/coverage@0%": 0.03359375,
|
|
"calibration/coverage@1%": 0.03359375,
|
|
"calibration/coverage@10%": 0.201953125,
|
|
"calibration/coverage@15%": 0.341796875,
|
|
"calibration/coverage@20%": 0.436328125,
|
|
"calibration/coverage@25%": 0.52265625,
|
|
"calibration/coverage@30%": 0.613671875,
|
|
"calibration/coverage@5%": 0.112109375,
|
|
"calibration/ece": 0.10123137926063848,
|
|
"calibration/mean_confidence": 0.49137512856978677,
|
|
"calibration/prompt_uniqueness": 0.84775390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 422.8,
|
|
"completions/max_terminated_length": 422.8,
|
|
"completions/mean_length": 180.31982421875,
|
|
"completions/mean_terminated_length": 180.31982421875,
|
|
"completions/min_length": 93.8,
|
|
"completions/min_terminated_length": 93.8,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.001026191283017397,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 657495861.0,
|
|
"reward": 0.8518246412277222,
|
|
"reward_std": 0.08756706416606903,
|
|
"rewards/accuracy_reward": 0.52275390625,
|
|
"rewards/brier_reward": 0.8068322658538818,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002647676505148411,
|
|
"rewards/frontier_coverage_1": 0.12973806113004685,
|
|
"rewards/frontier_coverage_10": 0.12968083024024962,
|
|
"rewards/frontier_coverage_15": 0.12917735427618027,
|
|
"rewards/frontier_coverage_20": 0.11514810025691986,
|
|
"rewards/frontier_coverage_25": 0.06962493434548378,
|
|
"rewards/frontier_coverage_5": 0.12973549515008925,
|
|
"rewards/frontier_ece_reward": 0.004124377947300672,
|
|
"rewards/frontier_entropy_batch_reward": -0.18768059611320495,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.093621826171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12246521413326264,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468109130859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0468109130859375,
|
|
"signal/advantage_abs_mean": 0.06893313750624656,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06893313750624656,
|
|
"signal/advantage_pre_scale_std": 0.10512781888246536,
|
|
"signal/advantage_std": 0.10512781888246536,
|
|
"signal/brier_reward/centered_abs_mean": 0.10718954056501388,
|
|
"signal/brier_reward/group_bin_occupancy": 0.858984375,
|
|
"signal/brier_reward/group_std_mean": 0.1384373813867569,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013398692570626735,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013398692570626735,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022487165872007607,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035695353988558056,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0252025792142375e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0252025792142375e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15328652858734132,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.877734375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19674740433692933,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002743828808888793,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002743828808888793,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1532078802585602,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.877734375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19664531350135803,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002742420881986618,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002742420881986618,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1525299906730652,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1957621306180954,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027302867732942105,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027302867732942105,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.129715932905674,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88046875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16696780920028687,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023219150956720115,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023219150956720115,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06962908133864402,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.90078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09048426896333694,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001246360526420176,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001246360526420176,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15328298211097718,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.877734375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19674279391765595,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002743765339255333,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002743765339255333,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00600477633997798,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.878515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007920240703970193,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007505970424972475,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007505970424972475,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2654553234577179,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3388149976730347,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033181915432214736,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033181915432214736,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24430388293357136,
|
|
"calibration/batch_distribution_entropy": 0.9791797972652356,
|
|
"calibration/batch_entropy_100bins": 0.9679413992157719,
|
|
"calibration/batch_entropy_10bins": 0.9791797972652356,
|
|
"calibration/batch_entropy_50bins": 0.975538565518886,
|
|
"calibration/batch_uniqueness": 0.9525665283203125,
|
|
"calibration/buffer_distribution_entropy": 0.9987113525336898,
|
|
"calibration/buffer_entropy_100bins": 0.998895678573958,
|
|
"calibration/buffer_entropy_10bins": 0.9987113525336898,
|
|
"calibration/buffer_entropy_50bins": 0.9989227369806599,
|
|
"calibration/confidence_entropy": 0.5174565045374463,
|
|
"calibration/coverage@0%": 0.07578125,
|
|
"calibration/coverage@1%": 0.111328125,
|
|
"calibration/coverage@10%": 0.309375,
|
|
"calibration/coverage@15%": 0.365234375,
|
|
"calibration/coverage@20%": 0.4078125,
|
|
"calibration/coverage@25%": 0.548828125,
|
|
"calibration/coverage@30%": 0.664453125,
|
|
"calibration/coverage@5%": 0.22578125,
|
|
"calibration/ece": 0.16490499070074144,
|
|
"calibration/mean_confidence": 0.5123002672000428,
|
|
"calibration/prompt_uniqueness": 0.852099609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 622.0,
|
|
"completions/max_terminated_length": 479.4,
|
|
"completions/mean_length": 184.83974609375,
|
|
"completions/mean_terminated_length": 183.78404846191407,
|
|
"completions/min_length": 97.4,
|
|
"completions/min_terminated_length": 97.4,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0008454410126432776,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 674731308.0,
|
|
"reward": 0.8708350419998169,
|
|
"reward_std": 0.08193039745092393,
|
|
"rewards/accuracy_reward": 0.56953125,
|
|
"rewards/brier_reward": 0.8082751274108887,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.002218431932851672,
|
|
"rewards/frontier_coverage_1": 0.09933431595563888,
|
|
"rewards/frontier_coverage_10": 0.09924045875668526,
|
|
"rewards/frontier_coverage_15": 0.09875798150897026,
|
|
"rewards/frontier_coverage_20": 0.08629776164889336,
|
|
"rewards/frontier_coverage_25": 0.056573347002267835,
|
|
"rewards/frontier_coverage_5": 0.09933282062411308,
|
|
"rewards/frontier_ece_reward": 0.003817522618919611,
|
|
"rewards/frontier_entropy_batch_reward": -0.19735628366470337,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07532958984375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10438980013132096,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037664794921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037664794921875,
|
|
"signal/advantage_abs_mean": 0.06335262283682823,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06335262283682823,
|
|
"signal/advantage_pre_scale_std": 0.09740418940782547,
|
|
"signal/advantage_std": 0.09740418940782547,
|
|
"signal/brier_reward/centered_abs_mean": 0.10196209698915482,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84765625,
|
|
"signal/brier_reward/group_std_mean": 0.13130579739809037,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012745262123644352,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012745262123644352,
|
|
"signal/format_reward/centered_abs_mean": 0.001171875,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.001374816708266735,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005859375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0005859375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020356971537694333,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.718359375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003529385570436716,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.643897762231063e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.643897762231063e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.139412322640419,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18100157380104065,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002495480561628938,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002495480561628938,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13926379680633544,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18081148266792296,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002492821915075183,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002492821915075183,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13798445761203765,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17917191088199616,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024699217174202204,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024699217174202204,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11161820888519287,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.866015625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1451725423336029,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019979658536612988,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019979658536612988,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.061739873886108396,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.89921875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08028749227523804,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011051436886191368,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011051436886191368,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13940848410129547,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1809966504573822,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002495411830022931,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002495411830022931,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005760820955038071,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00775869581848383,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007201026193797589,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007201026193797589,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2656015157699585,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34027169942855834,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03320018947124481,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03320018947124481,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.41854034890006203,
|
|
"eval_calibration/batch_distribution_entropy": 0.9258790022452184,
|
|
"eval_calibration/batch_entropy_100bins": 0.7155226945000874,
|
|
"eval_calibration/batch_entropy_10bins": 0.9258790022452184,
|
|
"eval_calibration/batch_entropy_50bins": 0.7976394251687033,
|
|
"eval_calibration/batch_uniqueness": 0.8994140625,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9987820695130332,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9989213084455153,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9987820695130332,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9989669617425297,
|
|
"eval_calibration/confidence_entropy": 0.49184587531394314,
|
|
"eval_calibration/coverage@0%": 0.0625,
|
|
"eval_calibration/coverage@1%": 0.0625,
|
|
"eval_calibration/coverage@10%": 0.1328125,
|
|
"eval_calibration/coverage@15%": 0.171875,
|
|
"eval_calibration/coverage@20%": 0.1953125,
|
|
"eval_calibration/coverage@25%": 0.2265625,
|
|
"eval_calibration/coverage@30%": 0.2578125,
|
|
"eval_calibration/coverage@5%": 0.0625,
|
|
"eval_calibration/ece": 0.17095216040466216,
|
|
"eval_calibration/mean_confidence": 0.45492840413455565,
|
|
"eval_calibration/prompt_uniqueness": 0.8994140625,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 295.25,
|
|
"eval_completions/max_terminated_length": 295.25,
|
|
"eval_completions/mean_length": 179.74158096313477,
|
|
"eval_completions/mean_terminated_length": 179.74158096313477,
|
|
"eval_completions/min_length": 109.75,
|
|
"eval_completions/min_terminated_length": 109.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 674731308.0,
|
|
"eval_reward": 0.7049643099308014,
|
|
"eval_reward_std": 0.22184203192591667,
|
|
"eval_rewards/accuracy_reward": 0.423828125,
|
|
"eval_rewards/brier_reward": 0.7956392019987106,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0033663903595879674,
|
|
"eval_rewards/frontier_coverage_1": 0.19531626999378204,
|
|
"eval_rewards/frontier_coverage_10": 0.1952117159962654,
|
|
"eval_rewards/frontier_coverage_15": 0.19375700131058693,
|
|
"eval_rewards/frontier_coverage_20": 0.15371991135179996,
|
|
"eval_rewards/frontier_coverage_25": 0.08286740258336067,
|
|
"eval_rewards/frontier_coverage_5": 0.19531207531690598,
|
|
"eval_rewards/frontier_ece_reward": 0.003727212024386972,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 17.1514,
|
|
"eval_samples_per_second": 29.152,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4744873046875,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49458901584148407,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23724365234375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23724365234375,
|
|
"eval_signal/advantage_abs_mean": 0.2057364284992218,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2057364284992218,
|
|
"eval_signal/advantage_pre_scale_std": 0.21934344619512558,
|
|
"eval_signal/advantage_std": 0.21934344619512558,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1897713765501976,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
|
|
"eval_signal/brier_reward/group_std_mean": 0.24221712350845337,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0237214220687747,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0237214220687747,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004152168636210263,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007893728208728135,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.432381426042411e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.432381426042411e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3731478080153465,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 1.0,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.45107389986515045,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006679345387965441,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006679345387965441,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.37288998067379,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 1.0,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.45077458769083023,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006674730451777577,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006674730451777577,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.36930492520332336,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 1.0,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4466145858168602,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0066105579026043415,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0066105579026043415,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.29113033413887024,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.354678250849247,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005211232579313219,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005211232579313219,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.13845044746994972,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.17535366117954254,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002478263049852103,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002478263049852103,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3731374442577362,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 1.0,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4510618671774864,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006679159821942449,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006679159821942449,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.007429954246617854,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8671875,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.010206094710156322,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009287442808272317,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009287442808272317,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.233,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4072680963308123,
|
|
"calibration/batch_distribution_entropy": 0.9719537361453303,
|
|
"calibration/batch_entropy_100bins": 0.9645036495344895,
|
|
"calibration/batch_entropy_10bins": 0.9719537361453303,
|
|
"calibration/batch_entropy_50bins": 0.972271453991881,
|
|
"calibration/batch_uniqueness": 0.9524566650390625,
|
|
"calibration/buffer_distribution_entropy": 0.9988068526161396,
|
|
"calibration/buffer_entropy_100bins": 0.9989106275315882,
|
|
"calibration/buffer_entropy_10bins": 0.9988068526161396,
|
|
"calibration/buffer_entropy_50bins": 0.9989733950683586,
|
|
"calibration/confidence_entropy": 0.5230128680468962,
|
|
"calibration/coverage@0%": 0.003515625,
|
|
"calibration/coverage@1%": 0.003515625,
|
|
"calibration/coverage@10%": 0.009765625,
|
|
"calibration/coverage@15%": 0.019140625,
|
|
"calibration/coverage@20%": 0.0875,
|
|
"calibration/coverage@25%": 0.137890625,
|
|
"calibration/coverage@30%": 0.26171875,
|
|
"calibration/coverage@5%": 0.003515625,
|
|
"calibration/ece": 0.1075798181494427,
|
|
"calibration/mean_confidence": 0.4706070682994466,
|
|
"calibration/prompt_uniqueness": 0.853369140625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 614.6,
|
|
"completions/max_terminated_length": 390.6,
|
|
"completions/mean_length": 182.33017578125,
|
|
"completions/mean_terminated_length": 182.19835510253907,
|
|
"completions/min_length": 91.2,
|
|
"completions/min_terminated_length": 91.2,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.001009272993542254,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 691454913.0,
|
|
"reward": 0.8378146886825562,
|
|
"reward_std": 0.09044925570487976,
|
|
"rewards/accuracy_reward": 0.50419921875,
|
|
"rewards/brier_reward": 0.7853815197944641,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.003076691273599863,
|
|
"rewards/frontier_coverage_1": 0.1220921441912651,
|
|
"rewards/frontier_coverage_10": 0.1220327764749527,
|
|
"rewards/frontier_coverage_15": 0.12089861333370208,
|
|
"rewards/frontier_coverage_20": 0.09999236166477203,
|
|
"rewards/frontier_coverage_25": 0.05890063121914864,
|
|
"rewards/frontier_coverage_5": 0.12208605259656906,
|
|
"rewards/frontier_ece_reward": 0.003139182738959789,
|
|
"rewards/frontier_entropy_batch_reward": -0.19447652399539947,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.098480224609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12976800352334977,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0492401123046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0492401123046875,
|
|
"signal/advantage_abs_mean": 0.07178077697753907,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07178077697753907,
|
|
"signal/advantage_pre_scale_std": 0.10841633677482605,
|
|
"signal/advantage_std": 0.10841633677482605,
|
|
"signal/brier_reward/centered_abs_mean": 0.11750788986682892,
|
|
"signal/brier_reward/group_bin_occupancy": 0.852734375,
|
|
"signal/brier_reward/group_std_mean": 0.15079601109027863,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014688486233353615,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014688486233353615,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026406456716358663,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004412023955956102,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7267557238228616e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7267557238228616e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16164307296276093,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2075218141078949,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028934108559042215,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028934108559042215,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16153694093227386,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.207388174533844,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002891511144116521,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002891511144116521,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15967210829257966,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20502502024173735,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028581305872648955,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028581305872648955,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12663253098726274,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16340535879135132,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002266722172498703,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002266722172498703,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0673256479203701,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.896875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08757460862398148,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012051290133967996,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012051290133967996,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1616332322359085,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20750951170921325,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028932347893714907,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028932347893714907,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0055423608049750325,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007337391003966331,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006927951006218791,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006927951006218791,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.266719377040863,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3390504062175751,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333992213010788,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333992213010788,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30184251919884036,
|
|
"calibration/batch_distribution_entropy": 0.9766400147465495,
|
|
"calibration/batch_entropy_100bins": 0.9661916960682403,
|
|
"calibration/batch_entropy_10bins": 0.9766400147465495,
|
|
"calibration/batch_entropy_50bins": 0.975390516981222,
|
|
"calibration/batch_uniqueness": 0.953192138671875,
|
|
"calibration/buffer_distribution_entropy": 0.9989337853009663,
|
|
"calibration/buffer_entropy_100bins": 0.9989487435236883,
|
|
"calibration/buffer_entropy_10bins": 0.9989337853009663,
|
|
"calibration/buffer_entropy_50bins": 0.9990317545857466,
|
|
"calibration/confidence_entropy": 0.4864500476998418,
|
|
"calibration/coverage@0%": 0.019140625,
|
|
"calibration/coverage@1%": 0.019140625,
|
|
"calibration/coverage@10%": 0.166796875,
|
|
"calibration/coverage@15%": 0.203515625,
|
|
"calibration/coverage@20%": 0.25,
|
|
"calibration/coverage@25%": 0.29921875,
|
|
"calibration/coverage@30%": 0.41796875,
|
|
"calibration/coverage@5%": 0.112890625,
|
|
"calibration/ece": 0.14628865248087672,
|
|
"calibration/mean_confidence": 0.4938824967603755,
|
|
"calibration/prompt_uniqueness": 0.848876953125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 387.6,
|
|
"completions/max_terminated_length": 387.6,
|
|
"completions/mean_length": 181.459765625,
|
|
"completions/mean_terminated_length": 181.459765625,
|
|
"completions/min_length": 90.0,
|
|
"completions/min_terminated_length": 90.0,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.0008657427970319986,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 708226501.0,
|
|
"reward": 0.8453529119491577,
|
|
"reward_std": 0.08392495959997177,
|
|
"rewards/accuracy_reward": 0.51845703125,
|
|
"rewards/brier_reward": 0.7956361413002014,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002716613910160959,
|
|
"rewards/frontier_coverage_1": 0.1360724911093712,
|
|
"rewards/frontier_coverage_10": 0.13601431995630264,
|
|
"rewards/frontier_coverage_15": 0.13472481966018676,
|
|
"rewards/frontier_coverage_20": 0.11215179413557053,
|
|
"rewards/frontier_coverage_25": 0.0678616002202034,
|
|
"rewards/frontier_coverage_5": 0.13606539219617844,
|
|
"rewards/frontier_ece_reward": 0.0036563334055244924,
|
|
"rewards/frontier_entropy_batch_reward": -0.2134263336658478,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.093804931640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12641526907682418,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0469024658203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0469024658203125,
|
|
"signal/advantage_abs_mean": 0.06465236023068428,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06465236023068428,
|
|
"signal/advantage_pre_scale_std": 0.0998497799038887,
|
|
"signal/advantage_std": 0.0998497799038887,
|
|
"signal/brier_reward/centered_abs_mean": 0.11606302261352539,
|
|
"signal/brier_reward/group_bin_occupancy": 0.844140625,
|
|
"signal/brier_reward/group_std_mean": 0.14761213660240174,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014507877826690673,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014507877826690673,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023735316237434743,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.742578125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003954212227836251,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2486215534154326e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2486215534154326e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1699829190969467,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.851171875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21657621562480928,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003042694181203842,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003042694181203842,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1698471039533615,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.851171875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21641322374343872,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003040262870490551,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003040262870490551,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16724947690963746,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.213279390335083,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029937655199319124,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029937655199319124,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12853406816720964,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8453125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1650033712387085,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002300759730860591,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002300759730860591,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07163915932178497,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.901953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09214308261871337,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001282340893521905,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001282340893521905,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16996634304523467,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.851171875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2165563225746155,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003042397554963827,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003042397554963827,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0058284570463001725,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007644351571798325,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007285571307875216,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007285571307875216,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2624157965183258,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730078125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3384029269218445,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032801974564790726,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032801974564790726,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32277610924710637,
|
|
"calibration/batch_distribution_entropy": 0.9701296713733163,
|
|
"calibration/batch_entropy_100bins": 0.9644057869188065,
|
|
"calibration/batch_entropy_10bins": 0.9701296713733163,
|
|
"calibration/batch_entropy_50bins": 0.9714994429885904,
|
|
"calibration/batch_uniqueness": 0.951641845703125,
|
|
"calibration/buffer_distribution_entropy": 0.9989417647738268,
|
|
"calibration/buffer_entropy_100bins": 0.9989298633022411,
|
|
"calibration/buffer_entropy_10bins": 0.9989417647738268,
|
|
"calibration/buffer_entropy_50bins": 0.9990118814717297,
|
|
"calibration/confidence_entropy": 0.513002509873567,
|
|
"calibration/coverage@0%": 0.01171875,
|
|
"calibration/coverage@1%": 0.01171875,
|
|
"calibration/coverage@10%": 0.151953125,
|
|
"calibration/coverage@15%": 0.240234375,
|
|
"calibration/coverage@20%": 0.323046875,
|
|
"calibration/coverage@25%": 0.4640625,
|
|
"calibration/coverage@30%": 0.544921875,
|
|
"calibration/coverage@5%": 0.037890625,
|
|
"calibration/ece": 0.12407527470589289,
|
|
"calibration/mean_confidence": 0.4836047739947819,
|
|
"calibration/prompt_uniqueness": 0.8462890625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 860.4,
|
|
"completions/max_terminated_length": 406.2,
|
|
"completions/mean_length": 185.49267578125,
|
|
"completions/mean_terminated_length": 185.22888793945313,
|
|
"completions/min_length": 92.2,
|
|
"completions/min_terminated_length": 92.2,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0011569778434932232,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 725079866.0,
|
|
"reward": 0.8533730626106262,
|
|
"reward_std": 0.09096147418022156,
|
|
"rewards/accuracy_reward": 0.5375,
|
|
"rewards/brier_reward": 0.7997807264328003,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0025234234519302843,
|
|
"rewards/frontier_coverage_1": 0.1166018046438694,
|
|
"rewards/frontier_coverage_10": 0.11651684194803238,
|
|
"rewards/frontier_coverage_15": 0.11543264091014863,
|
|
"rewards/frontier_coverage_20": 0.08750456124544144,
|
|
"rewards/frontier_coverage_25": 0.05482863634824753,
|
|
"rewards/frontier_coverage_5": 0.11660146117210388,
|
|
"rewards/frontier_ece_reward": 0.002992427349090576,
|
|
"rewards/frontier_entropy_batch_reward": -0.21163803935050965,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0998291015625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13668281584978104,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04991455078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04991455078125,
|
|
"signal/advantage_abs_mean": 0.06996657401323318,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06996657401323318,
|
|
"signal/advantage_pre_scale_std": 0.10759487152099609,
|
|
"signal/advantage_std": 0.10759487152099609,
|
|
"signal/brier_reward/centered_abs_mean": 0.10977463126182556,
|
|
"signal/brier_reward/group_bin_occupancy": 0.861328125,
|
|
"signal/brier_reward/group_std_mean": 0.14089445173740386,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013721828907728195,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013721828907728195,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020935308886691926,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.746484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034752024803310633,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.747420341824181e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.747420341824181e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16087363958358764,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20536437928676604,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002879638038575649,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002879638038575649,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16079167425632476,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.866015625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20525703132152556,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002878170693293214,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002878170693293214,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15650815665721893,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19971639513969422,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028014959301799537,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028014959301799537,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1123495414853096,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14411205649375916,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020110567333176733,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020110567333176733,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06338529288768768,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0813615933060646,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011345966951921583,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011345966951921583,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16086728274822235,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20535596311092377,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002879524324089289,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002879524324089289,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0054166271351277825,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00712386667728424,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006770783918909728,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006770783918909728,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2808054625988007,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35399608612060546,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035100682824850085,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035100682824850085,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2567679202323507,
|
|
"calibration/batch_distribution_entropy": 0.9753718666465045,
|
|
"calibration/batch_entropy_100bins": 0.9653751735288918,
|
|
"calibration/batch_entropy_10bins": 0.9753718666465045,
|
|
"calibration/batch_entropy_50bins": 0.9753353539912812,
|
|
"calibration/batch_uniqueness": 0.9522705078125,
|
|
"calibration/buffer_distribution_entropy": 0.9989540322458701,
|
|
"calibration/buffer_entropy_100bins": 0.9989665872140143,
|
|
"calibration/buffer_entropy_10bins": 0.9989540322458701,
|
|
"calibration/buffer_entropy_50bins": 0.9990419447527051,
|
|
"calibration/confidence_entropy": 0.4933685141452767,
|
|
"calibration/coverage@0%": 0.008203125,
|
|
"calibration/coverage@1%": 0.008203125,
|
|
"calibration/coverage@10%": 0.11015625,
|
|
"calibration/coverage@15%": 0.191015625,
|
|
"calibration/coverage@20%": 0.31171875,
|
|
"calibration/coverage@25%": 0.5234375,
|
|
"calibration/coverage@30%": 0.6765625,
|
|
"calibration/coverage@5%": 0.024609375,
|
|
"calibration/ece": 0.10514110855139244,
|
|
"calibration/mean_confidence": 0.5225538825879033,
|
|
"calibration/prompt_uniqueness": 0.837548828125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 687.4,
|
|
"completions/max_terminated_length": 469.4,
|
|
"completions/mean_length": 189.19560546875,
|
|
"completions/mean_terminated_length": 189.06415710449218,
|
|
"completions/min_length": 97.0,
|
|
"completions/min_terminated_length": 97.0,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.00079206726513803,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 741883373.0,
|
|
"reward": 0.8564110398292542,
|
|
"reward_std": 0.08603468835353852,
|
|
"rewards/accuracy_reward": 0.5404296875,
|
|
"rewards/brier_reward": 0.8099352717399597,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002810010826215148,
|
|
"rewards/frontier_coverage_1": 0.11663352549076081,
|
|
"rewards/frontier_coverage_10": 0.11659030914306641,
|
|
"rewards/frontier_coverage_15": 0.1124075010418892,
|
|
"rewards/frontier_coverage_20": 0.08771874606609345,
|
|
"rewards/frontier_coverage_25": 0.0565977543592453,
|
|
"rewards/frontier_coverage_5": 0.11662895604968071,
|
|
"rewards/frontier_ece_reward": 0.0033508235588669776,
|
|
"rewards/frontier_entropy_batch_reward": -0.20978534519672393,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.082080078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10941672474145889,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0410400390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0410400390625,
|
|
"signal/advantage_abs_mean": 0.06718932390213013,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06718932390213013,
|
|
"signal/advantage_pre_scale_std": 0.10292920172214508,
|
|
"signal/advantage_std": 0.10292920172214508,
|
|
"signal/brier_reward/centered_abs_mean": 0.10521638691425324,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8578125,
|
|
"signal/brier_reward/group_std_mean": 0.13554594218730925,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013152048364281655,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013152048364281655,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024594481103122233,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004012216068804264,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.402412014314905e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.402412014314905e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14190192222595216,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.874609375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18161689043045043,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025400443468242885,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025400443468242885,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14177892506122589,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.874609375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18146247267723084,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002537842746824026,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002537842746824026,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1324725031852722,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16975606381893157,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023712576366961002,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023712576366961002,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09445251375436783,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12184825539588928,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001690700021572411,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001690700021572411,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05518615916371346,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.925,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07120932638645172,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009878322365693749,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009878322365693749,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1418927103281021,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.874609375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18160516917705535,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025398793630301954,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025398793630301954,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005274960119277239,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.878515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006980370450764895,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006593700149096548,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006593700149096548,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28154911994934084,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.733203125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3582507610321045,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035193639993667605,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035193639993667605,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23126784031942532,
|
|
"calibration/batch_distribution_entropy": 0.9914821225669396,
|
|
"calibration/batch_entropy_100bins": 0.9745556431012833,
|
|
"calibration/batch_entropy_10bins": 0.9914821225669396,
|
|
"calibration/batch_entropy_50bins": 0.9834161123379925,
|
|
"calibration/batch_uniqueness": 0.9553955078125,
|
|
"calibration/buffer_distribution_entropy": 0.9990940525630616,
|
|
"calibration/buffer_entropy_100bins": 0.9990462171221427,
|
|
"calibration/buffer_entropy_10bins": 0.9990940525630616,
|
|
"calibration/buffer_entropy_50bins": 0.9991333015296009,
|
|
"calibration/confidence_entropy": 0.4974279364345352,
|
|
"calibration/coverage@0%": 0.08125,
|
|
"calibration/coverage@1%": 0.1296875,
|
|
"calibration/coverage@10%": 0.297265625,
|
|
"calibration/coverage@15%": 0.3734375,
|
|
"calibration/coverage@20%": 0.453125,
|
|
"calibration/coverage@25%": 0.551171875,
|
|
"calibration/coverage@30%": 0.6421875,
|
|
"calibration/coverage@5%": 0.225390625,
|
|
"calibration/ece": 0.13818892162141455,
|
|
"calibration/mean_confidence": 0.528279888360925,
|
|
"calibration/prompt_uniqueness": 0.8396484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 392.4,
|
|
"completions/max_terminated_length": 392.4,
|
|
"completions/mean_length": 193.5751953125,
|
|
"completions/mean_terminated_length": 193.5751953125,
|
|
"completions/min_length": 102.0,
|
|
"completions/min_terminated_length": 102.0,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0008994463132694364,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 758875439.0,
|
|
"reward": 0.867469334602356,
|
|
"reward_std": 0.08709415346384049,
|
|
"rewards/accuracy_reward": 0.563671875,
|
|
"rewards/brier_reward": 0.8105340003967285,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002341646375134587,
|
|
"rewards/frontier_coverage_1": 0.10286648273468017,
|
|
"rewards/frontier_coverage_10": 0.10283031612634659,
|
|
"rewards/frontier_coverage_15": 0.09793300032615662,
|
|
"rewards/frontier_coverage_20": 0.07525258213281631,
|
|
"rewards/frontier_coverage_25": 0.05333108454942703,
|
|
"rewards/frontier_coverage_5": 0.10286374539136886,
|
|
"rewards/frontier_ece_reward": 0.003090843977406621,
|
|
"rewards/frontier_entropy_batch_reward": -0.20484532713890075,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08983154296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12185031622648239,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044915771484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044915771484375,
|
|
"signal/advantage_abs_mean": 0.06714712977409362,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06714712977409362,
|
|
"signal/advantage_pre_scale_std": 0.10354482531547546,
|
|
"signal/advantage_std": 0.10354482531547546,
|
|
"signal/brier_reward/centered_abs_mean": 0.10361835062503814,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8609375,
|
|
"signal/brier_reward/group_std_mean": 0.1339139461517334,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012952293828129768,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012952293828129768,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002101215533912182,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033895236440002917,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7611756488331595e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7611756488331595e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14568218886852263,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18670837283134462,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002607711125165224,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002607711125165224,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14543514251708983,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1864002525806427,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026032889261841776,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026032889261841776,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1325514554977417,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17030819058418273,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023726709187030792,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023726709187030792,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0897199884057045,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11619268357753754,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016059877583757044,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016059877583757044,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.054404760152101515,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.928515625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06992583870887756,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009738451801240445,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009738451801240445,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14567132890224457,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18669503033161164,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026075166650116445,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026075166650116445,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0052463172003626825,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.901953125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006824824120849371,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006557896500453353,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006557896500453353,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2725887656211853,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.736328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3432928442955017,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03407359570264816,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03407359570264816,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.251207175508105,
|
|
"calibration/batch_distribution_entropy": 0.9803575481156237,
|
|
"calibration/batch_entropy_100bins": 0.96926214899048,
|
|
"calibration/batch_entropy_10bins": 0.9803575481156237,
|
|
"calibration/batch_entropy_50bins": 0.9778726337355224,
|
|
"calibration/batch_uniqueness": 0.9536702349762987,
|
|
"calibration/buffer_distribution_entropy": 0.9991889846010944,
|
|
"calibration/buffer_entropy_100bins": 0.9990874643844343,
|
|
"calibration/buffer_entropy_10bins": 0.9991889846010944,
|
|
"calibration/buffer_entropy_50bins": 0.9991898926831168,
|
|
"calibration/confidence_entropy": 0.48241655738057865,
|
|
"calibration/coverage@0%": 0.022267153864970645,
|
|
"calibration/coverage@1%": 0.022267153864970645,
|
|
"calibration/coverage@10%": 0.15632491438356164,
|
|
"calibration/coverage@15%": 0.21420162671232879,
|
|
"calibration/coverage@20%": 0.4169658145792564,
|
|
"calibration/coverage@25%": 0.5560673006360078,
|
|
"calibration/coverage@30%": 0.6850255320450097,
|
|
"calibration/coverage@5%": 0.1152718321917808,
|
|
"calibration/ece": 0.13081064207628812,
|
|
"calibration/mean_confidence": 0.533058629028637,
|
|
"calibration/prompt_uniqueness": 0.8375652904689126,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 881.6,
|
|
"completions/max_terminated_length": 427.8,
|
|
"completions/mean_length": 198.65859375,
|
|
"completions/mean_terminated_length": 198.26717834472657,
|
|
"completions/min_length": 100.8,
|
|
"completions/min_terminated_length": 100.8,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0006931371171958745,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 775849287.0,
|
|
"reward": 0.8676259756088257,
|
|
"reward_std": 0.08426170200109481,
|
|
"rewards/accuracy_reward": 0.5630859375,
|
|
"rewards/brier_reward": 0.8032041311264038,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0026727572083473207,
|
|
"rewards/frontier_coverage_1": 0.10416304171085358,
|
|
"rewards/frontier_coverage_10": 0.10409975200891494,
|
|
"rewards/frontier_coverage_15": 0.09961197376251221,
|
|
"rewards/frontier_coverage_20": 0.07543607577681541,
|
|
"rewards/frontier_coverage_25": 0.0561057448387146,
|
|
"rewards/frontier_coverage_5": 0.10416053682565689,
|
|
"rewards/frontier_ece_reward": 0.002644325466826558,
|
|
"rewards/frontier_entropy_batch_reward": -0.19347001612186432,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0850830078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.114646577835083,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04254150390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04254150390625,
|
|
"signal/advantage_abs_mean": 0.06527714878320694,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06527714878320694,
|
|
"signal/advantage_pre_scale_std": 0.10004038214683533,
|
|
"signal/advantage_std": 0.10004038214683533,
|
|
"signal/brier_reward/centered_abs_mean": 0.10927082747220992,
|
|
"signal/brier_reward/group_bin_occupancy": 0.831640625,
|
|
"signal/brier_reward/group_std_mean": 0.14201997220516205,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01365885343402624,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01365885343402624,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023929367307573557,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037743649911135433,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.283356502128299e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.283356502128299e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14819615185260773,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.84921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19186924695968627,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026527110021561384,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026527110021561384,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.147468763589859,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.850390625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1909423440694809,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026396906469017267,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026396906469017267,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1348020002245903,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.840234375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17472992837429047,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024129556957632305,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024129556957632305,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08800848871469498,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11463980823755264,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015753519488498568,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015753519488498568,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.055414053797721866,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.925390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07170938104391097,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000991911522578448,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000991911522578448,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14818698167800903,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.84921875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19185736775398254,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026525467168539763,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026525467168539763,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005124002322554588,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006701454985886812,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006405002903193235,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006405002903193235,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26818968653678893,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34574413299560547,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033523710817098616,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033523710817098616,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2622214479199293,
|
|
"calibration/batch_distribution_entropy": 0.9764551048609474,
|
|
"calibration/batch_entropy_100bins": 0.9666018818532802,
|
|
"calibration/batch_entropy_10bins": 0.9764551048609474,
|
|
"calibration/batch_entropy_50bins": 0.9763242677062596,
|
|
"calibration/batch_uniqueness": 0.9524139404296875,
|
|
"calibration/buffer_distribution_entropy": 0.9991939792089879,
|
|
"calibration/buffer_entropy_100bins": 0.9990739400539542,
|
|
"calibration/buffer_entropy_10bins": 0.9991939792089879,
|
|
"calibration/buffer_entropy_50bins": 0.9991816292648175,
|
|
"calibration/confidence_entropy": 0.4706510873405473,
|
|
"calibration/coverage@0%": 0.010546875,
|
|
"calibration/coverage@1%": 0.010546875,
|
|
"calibration/coverage@10%": 0.100390625,
|
|
"calibration/coverage@15%": 0.246484375,
|
|
"calibration/coverage@20%": 0.447265625,
|
|
"calibration/coverage@25%": 0.584765625,
|
|
"calibration/coverage@30%": 0.669140625,
|
|
"calibration/coverage@5%": 0.03515625,
|
|
"calibration/ece": 0.11216717108532222,
|
|
"calibration/mean_confidence": 0.48983524720107646,
|
|
"calibration/prompt_uniqueness": 0.8369140625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 411.8,
|
|
"completions/max_terminated_length": 411.8,
|
|
"completions/mean_length": 198.97177734375,
|
|
"completions/mean_terminated_length": 198.97177734375,
|
|
"completions/min_length": 105.6,
|
|
"completions/min_terminated_length": 105.6,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0007598252850584686,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 793113958.0,
|
|
"reward": 0.8673793315887451,
|
|
"reward_std": 0.0860441878437996,
|
|
"rewards/accuracy_reward": 0.56337890625,
|
|
"rewards/brier_reward": 0.8044471979141236,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002977763069793582,
|
|
"rewards/frontier_coverage_1": 0.10541683062911034,
|
|
"rewards/frontier_coverage_10": 0.10495719313621521,
|
|
"rewards/frontier_coverage_15": 0.09651436656713486,
|
|
"rewards/frontier_coverage_20": 0.07021676413714886,
|
|
"rewards/frontier_coverage_25": 0.050591808184981345,
|
|
"rewards/frontier_coverage_5": 0.10540874376893043,
|
|
"rewards/frontier_ece_reward": 0.0025658421916887166,
|
|
"rewards/frontier_entropy_batch_reward": -0.1974082589149475,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083087158203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11437420845031739,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0415435791015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0415435791015625,
|
|
"signal/advantage_abs_mean": 0.06670793667435646,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06670793667435646,
|
|
"signal/advantage_pre_scale_std": 0.10405694842338561,
|
|
"signal/advantage_std": 0.10405694842338561,
|
|
"signal/brier_reward/centered_abs_mean": 0.1071990892291069,
|
|
"signal/brier_reward/group_bin_occupancy": 0.837890625,
|
|
"signal/brier_reward/group_std_mean": 0.13994504809379577,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013399886153638362,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013399886153638362,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00265240459702909,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004353985376656056,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7478038322879004e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7478038322879004e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14081787765026094,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18302632570266725,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025206399615854023,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025206399615854023,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1402135133743286,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1822360187768936,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002509821904823184,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002509821904823184,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12741587162017823,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16580144464969634,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022807438392192124,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022807438392192124,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08223778158426284,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10763536989688874,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014720562612637877,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014720562612637877,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.053284359723329545,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.91953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06900968700647354,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009537900099530816,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009537900099530816,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1408083975315094,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18301377892494203,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025204701349139215,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025204701349139215,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005157566629350185,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006740899570286274,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006446958286687732,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006446958286687732,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2672864556312561,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3404460310935974,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03341080695390701,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03341080695390701,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2681903946291124,
|
|
"calibration/batch_distribution_entropy": 0.9767026425312034,
|
|
"calibration/batch_entropy_100bins": 0.9688933764667013,
|
|
"calibration/batch_entropy_10bins": 0.9767026425312034,
|
|
"calibration/batch_entropy_50bins": 0.9750379284339221,
|
|
"calibration/batch_uniqueness": 0.9525848388671875,
|
|
"calibration/buffer_distribution_entropy": 0.999199839210462,
|
|
"calibration/buffer_entropy_100bins": 0.9990462741781064,
|
|
"calibration/buffer_entropy_10bins": 0.999199839210462,
|
|
"calibration/buffer_entropy_50bins": 0.9991680034614012,
|
|
"calibration/confidence_entropy": 0.5025959777866486,
|
|
"calibration/coverage@0%": 0.042578125,
|
|
"calibration/coverage@1%": 0.061328125,
|
|
"calibration/coverage@10%": 0.226171875,
|
|
"calibration/coverage@15%": 0.2875,
|
|
"calibration/coverage@20%": 0.3734375,
|
|
"calibration/coverage@25%": 0.491015625,
|
|
"calibration/coverage@30%": 0.584765625,
|
|
"calibration/coverage@5%": 0.177734375,
|
|
"calibration/ece": 0.14313831918485717,
|
|
"calibration/mean_confidence": 0.4804080079299814,
|
|
"calibration/prompt_uniqueness": 0.8513671875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 403.4,
|
|
"completions/max_terminated_length": 403.4,
|
|
"completions/mean_length": 200.05576171875,
|
|
"completions/mean_terminated_length": 200.05576171875,
|
|
"completions/min_length": 102.2,
|
|
"completions/min_terminated_length": 102.2,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0011492387857288122,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 810095233.0,
|
|
"reward": 0.8437476992607117,
|
|
"reward_std": 0.08492105603218078,
|
|
"rewards/accuracy_reward": 0.50966796875,
|
|
"rewards/brier_reward": 0.810984981060028,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002536864671856165,
|
|
"rewards/frontier_coverage_1": 0.14460064321756363,
|
|
"rewards/frontier_coverage_10": 0.14430496394634246,
|
|
"rewards/frontier_coverage_15": 0.13622619807720185,
|
|
"rewards/frontier_coverage_20": 0.0965993657708168,
|
|
"rewards/frontier_coverage_25": 0.05859274864196777,
|
|
"rewards/frontier_coverage_5": 0.14456866830587387,
|
|
"rewards/frontier_ece_reward": 0.002746673859655857,
|
|
"rewards/frontier_entropy_batch_reward": -0.20547258853912354,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.081915283203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11158370226621628,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409576416015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0409576416015625,
|
|
"signal/advantage_abs_mean": 0.06546520814299583,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06546520814299583,
|
|
"signal/advantage_pre_scale_std": 0.10177824050188064,
|
|
"signal/advantage_std": 0.10177824050188064,
|
|
"signal/brier_reward/centered_abs_mean": 0.10417567193508148,
|
|
"signal/brier_reward/group_bin_occupancy": 0.845703125,
|
|
"signal/brier_reward/group_std_mean": 0.13512639403343202,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013021958991885185,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013021958991885185,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020107618300244214,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003275436395779252,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.599263691285159e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.599263691285159e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14825652539730072,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19205498099327087,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002653791708871722,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002653791708871722,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14757258892059327,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19117499589920045,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002641549287363887,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002641549287363887,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13492438793182374,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1748009592294693,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024151464458554983,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024151464458554983,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0868727594614029,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11287190318107605,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015550222946330906,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015550222946330906,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05342138335108757,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.926171875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06843771934509277,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009562427527271211,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009562427527271211,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1482018768787384,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1919853150844574,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026528135407716037,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026528135407716037,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004641291126608849,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.895703125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006130393128842115,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005801613908261061,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005801613908261061,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2680731534957886,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34347763657569885,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03350914418697357,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03350914418697357,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3410998255186805,
|
|
"calibration/batch_distribution_entropy": 0.98266861992294,
|
|
"calibration/batch_entropy_100bins": 0.9725637560170315,
|
|
"calibration/batch_entropy_10bins": 0.98266861992294,
|
|
"calibration/batch_entropy_50bins": 0.9805054265093982,
|
|
"calibration/batch_uniqueness": 0.9535858154296875,
|
|
"calibration/buffer_distribution_entropy": 0.9991640665932143,
|
|
"calibration/buffer_entropy_100bins": 0.9990458872496693,
|
|
"calibration/buffer_entropy_10bins": 0.9991640665932143,
|
|
"calibration/buffer_entropy_50bins": 0.9991495337630607,
|
|
"calibration/confidence_entropy": 0.47953550982848137,
|
|
"calibration/coverage@0%": 0.02890625,
|
|
"calibration/coverage@1%": 0.04296875,
|
|
"calibration/coverage@10%": 0.2109375,
|
|
"calibration/coverage@15%": 0.2875,
|
|
"calibration/coverage@20%": 0.32265625,
|
|
"calibration/coverage@25%": 0.34765625,
|
|
"calibration/coverage@30%": 0.365625,
|
|
"calibration/coverage@5%": 0.098828125,
|
|
"calibration/ece": 0.16965724304332572,
|
|
"calibration/mean_confidence": 0.4929179841997679,
|
|
"calibration/prompt_uniqueness": 0.83154296875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 863.4,
|
|
"completions/max_terminated_length": 431.8,
|
|
"completions/mean_length": 197.94599609375,
|
|
"completions/mean_terminated_length": 197.5545166015625,
|
|
"completions/min_length": 103.2,
|
|
"completions/min_terminated_length": 103.2,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0008992942166514695,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 827296568.0,
|
|
"reward": 0.860960865020752,
|
|
"reward_std": 0.08637812584638596,
|
|
"rewards/accuracy_reward": 0.55595703125,
|
|
"rewards/brier_reward": 0.7818328976631165,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002828824752941728,
|
|
"rewards/frontier_coverage_1": 0.08855738416314125,
|
|
"rewards/frontier_coverage_10": 0.08812275156378746,
|
|
"rewards/frontier_coverage_15": 0.08236327841877937,
|
|
"rewards/frontier_coverage_20": 0.059821216762065886,
|
|
"rewards/frontier_coverage_25": 0.043670283257961275,
|
|
"rewards/frontier_coverage_5": 0.08851732909679413,
|
|
"rewards/frontier_ece_reward": 0.0014453153213253244,
|
|
"rewards/frontier_entropy_batch_reward": -0.18243320286273956,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.097711181640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.169140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12649333626031875,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0488555908203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0488555908203125,
|
|
"signal/advantage_abs_mean": 0.06811224520206452,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06811224520206452,
|
|
"signal/advantage_pre_scale_std": 0.10434643775224686,
|
|
"signal/advantage_std": 0.10434643775224686,
|
|
"signal/brier_reward/centered_abs_mean": 0.11582219302654266,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84453125,
|
|
"signal/brier_reward/group_std_mean": 0.14920888543128968,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014477774128317833,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014477774128317833,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00234157289378345,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.741796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00370767368003726,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.19141557358671e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.19141557358671e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1641725480556488,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21050458252429963,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002938688499853015,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002938688499853015,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16328320205211638,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.851953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20939250588417052,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029227692633867265,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029227692633867265,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1482144132256508,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.848828125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1904875546693802,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002653037803247571,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002653037803247571,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09080570191144943,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11784365773200989,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016254220623522996,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016254220623522996,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05480258762836456,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.91015625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07086438089609146,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000980966305360198,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000980966305360198,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16410693824291228,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8515625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21042270064353943,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029375139623880387,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029375139623880387,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004935114085674286,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89140625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006519688945263624,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006168892607092858,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006168892607092858,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24725628197193145,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3199459671974182,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03090703524649143,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03090703524649143,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1940784264652428,
|
|
"calibration/batch_distribution_entropy": 0.9770675388500825,
|
|
"calibration/batch_entropy_100bins": 0.9662281299714202,
|
|
"calibration/batch_entropy_10bins": 0.9770675388500825,
|
|
"calibration/batch_entropy_50bins": 0.9742705536997672,
|
|
"calibration/batch_uniqueness": 0.9523101806640625,
|
|
"calibration/buffer_distribution_entropy": 0.999172449019247,
|
|
"calibration/buffer_entropy_100bins": 0.9990648204449872,
|
|
"calibration/buffer_entropy_10bins": 0.999172449019247,
|
|
"calibration/buffer_entropy_50bins": 0.9991687135590434,
|
|
"calibration/confidence_entropy": 0.4846538428188453,
|
|
"calibration/coverage@0%": 0.0546875,
|
|
"calibration/coverage@1%": 0.0546875,
|
|
"calibration/coverage@10%": 0.346484375,
|
|
"calibration/coverage@15%": 0.489453125,
|
|
"calibration/coverage@20%": 0.575,
|
|
"calibration/coverage@25%": 0.65234375,
|
|
"calibration/coverage@30%": 0.7375,
|
|
"calibration/coverage@5%": 0.1671875,
|
|
"calibration/ece": 0.10917596226614659,
|
|
"calibration/mean_confidence": 0.4966383387592807,
|
|
"calibration/prompt_uniqueness": 0.8314453125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 653.8,
|
|
"completions/max_terminated_length": 424.2,
|
|
"completions/mean_length": 195.6087890625,
|
|
"completions/mean_terminated_length": 195.47777709960937,
|
|
"completions/min_length": 104.2,
|
|
"completions/min_terminated_length": 104.2,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0010258163092657924,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 844310162.0,
|
|
"reward": 0.8748032927513123,
|
|
"reward_std": 0.08496512919664383,
|
|
"rewards/accuracy_reward": 0.58251953125,
|
|
"rewards/brier_reward": 0.8144230008125305,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002836526231840253,
|
|
"rewards/frontier_coverage_1": 0.10128066837787628,
|
|
"rewards/frontier_coverage_10": 0.10073070526123047,
|
|
"rewards/frontier_coverage_15": 0.09388678222894668,
|
|
"rewards/frontier_coverage_20": 0.06677651032805443,
|
|
"rewards/frontier_coverage_25": 0.05463530197739601,
|
|
"rewards/frontier_coverage_5": 0.10115833282470703,
|
|
"rewards/frontier_ece_reward": 0.002541623217985034,
|
|
"rewards/frontier_entropy_batch_reward": -0.2220643639564514,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084588623046875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11226904094219207,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422943115234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422943115234375,
|
|
"signal/advantage_abs_mean": 0.06675532534718513,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06675532534718513,
|
|
"signal/advantage_pre_scale_std": 0.10448435842990875,
|
|
"signal/advantage_std": 0.10448435842990875,
|
|
"signal/brier_reward/centered_abs_mean": 0.10213624089956283,
|
|
"signal/brier_reward/group_bin_occupancy": 0.83828125,
|
|
"signal/brier_reward/group_std_mean": 0.1328400731086731,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012767030112445354,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012767030112445354,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002544344821944833,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003990656137466431,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.554377155727707e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.554377155727707e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1359323427081108,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.841796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17886653840541838,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002433188818395138,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002433188818395138,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1348055586218834,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.841796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17739444077014924,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024130194447934627,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024130194447934627,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1202843114733696,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.83515625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15877383649349214,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021530891302973033,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021530891302973033,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0718239963054657,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09572341293096542,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012856494868174195,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012856494868174195,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.051678837090730664,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.923046875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06689032912254333,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009250511298887432,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009250511298887432,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13572666347026824,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.842578125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17859258353710175,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024295071605592968,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024295071605592968,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005082287080585957,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006694659031927586,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006352858850732446,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006352858850732446,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2741507351398468,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34453503489494325,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03426884189248085,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03426884189248085,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.4024781794204561,
|
|
"eval_calibration/batch_distribution_entropy": 0.9411803508728845,
|
|
"eval_calibration/batch_entropy_100bins": 0.722578085023462,
|
|
"eval_calibration/batch_entropy_10bins": 0.9411803508728845,
|
|
"eval_calibration/batch_entropy_50bins": 0.798684377132812,
|
|
"eval_calibration/batch_uniqueness": 0.8974609375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9992212146130883,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9990995964084779,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9992212146130883,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9992052314450008,
|
|
"eval_calibration/confidence_entropy": 0.4720443419968122,
|
|
"eval_calibration/coverage@0%": 0.0703125,
|
|
"eval_calibration/coverage@1%": 0.0703125,
|
|
"eval_calibration/coverage@10%": 0.0703125,
|
|
"eval_calibration/coverage@15%": 0.203125,
|
|
"eval_calibration/coverage@20%": 0.2421875,
|
|
"eval_calibration/coverage@25%": 0.3125,
|
|
"eval_calibration/coverage@30%": 0.34375,
|
|
"eval_calibration/coverage@5%": 0.0703125,
|
|
"eval_calibration/ece": 0.17127804387863715,
|
|
"eval_calibration/mean_confidence": 0.4861035242434224,
|
|
"eval_calibration/prompt_uniqueness": 0.8974609375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 359.75,
|
|
"eval_completions/max_terminated_length": 359.75,
|
|
"eval_completions/mean_length": 197.09428787231445,
|
|
"eval_completions/mean_terminated_length": 197.09428787231445,
|
|
"eval_completions/min_length": 126.0,
|
|
"eval_completions/min_terminated_length": 126.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 844310162.0,
|
|
"eval_reward": 0.7075212150812149,
|
|
"eval_reward_std": 0.23280686885118484,
|
|
"eval_rewards/accuracy_reward": 0.4296875,
|
|
"eval_rewards/brier_reward": 0.8027084320783615,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0038928079302422702,
|
|
"eval_rewards/frontier_coverage_1": 0.200554970651865,
|
|
"eval_rewards/frontier_coverage_10": 0.19824624806642532,
|
|
"eval_rewards/frontier_coverage_15": 0.17718525603413582,
|
|
"eval_rewards/frontier_coverage_20": 0.10994750820100307,
|
|
"eval_rewards/frontier_coverage_25": 0.06260389927774668,
|
|
"eval_rewards/frontier_coverage_5": 0.19991321116685867,
|
|
"eval_rewards/frontier_ece_reward": 0.003450465912465006,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 19.3003,
|
|
"eval_samples_per_second": 25.906,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4765625,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49562519043684006,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23828125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23828125,
|
|
"eval_signal/advantage_abs_mean": 0.21785810217261314,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21785810217261314,
|
|
"eval_signal/advantage_pre_scale_std": 0.23014385625720024,
|
|
"eval_signal/advantage_std": 0.23014385625720024,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1881571188569069,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.875,
|
|
"eval_signal/brier_reward/group_std_mean": 0.23912940546870232,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02351963985711336,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02351963985711336,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005338445422239602,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6328125,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009567599976435304,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.555817086948082e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.555817086948082e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.348846860229969,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.42089004069566727,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006244358723051846,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006244358723051846,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3454489931464195,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4168899804353714,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006183536606840789,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006183536606840789,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.30945945531129837,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.37479550391435623,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005539324251003563,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005539324251003563,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1743907555937767,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.2171802930533886,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031215944909490645,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031215944909490645,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0898975171148777,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.11114342510700226,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016091655124910176,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016091655124910176,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34793129563331604,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.41981156170368195,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006227970006875694,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006227970006875694,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006496628629975021,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.008258524350821972,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008120785787468776,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008120785787468776,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.207,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20885883826229318,
|
|
"calibration/batch_distribution_entropy": 0.9617354323768474,
|
|
"calibration/batch_entropy_100bins": 0.9607130781173305,
|
|
"calibration/batch_entropy_10bins": 0.9617354323768474,
|
|
"calibration/batch_entropy_50bins": 0.9659121941892688,
|
|
"calibration/batch_uniqueness": 0.9494836297596694,
|
|
"calibration/buffer_distribution_entropy": 0.999124191511771,
|
|
"calibration/buffer_entropy_100bins": 0.999041988564324,
|
|
"calibration/buffer_entropy_10bins": 0.999124191511771,
|
|
"calibration/buffer_entropy_50bins": 0.9991497628306819,
|
|
"calibration/confidence_entropy": 0.46736913978754613,
|
|
"calibration/coverage@0%": 0.043359375,
|
|
"calibration/coverage@1%": 0.043359375,
|
|
"calibration/coverage@10%": 0.171484375,
|
|
"calibration/coverage@15%": 0.28758408757338555,
|
|
"calibration/coverage@20%": 0.557930987035225,
|
|
"calibration/coverage@25%": 0.7530630809686889,
|
|
"calibration/coverage@30%": 0.8226279659980431,
|
|
"calibration/coverage@5%": 0.102734375,
|
|
"calibration/ece": 0.14049068595229333,
|
|
"calibration/mean_confidence": 0.523476685252264,
|
|
"calibration/prompt_uniqueness": 0.8333644458084027,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 467.6,
|
|
"completions/max_terminated_length": 467.6,
|
|
"completions/mean_length": 194.86865234375,
|
|
"completions/mean_terminated_length": 194.86865234375,
|
|
"completions/min_length": 103.8,
|
|
"completions/min_terminated_length": 103.8,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.001114765414968133,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 861404785.0,
|
|
"reward": 0.8725868582725524,
|
|
"reward_std": 0.08807137310504913,
|
|
"rewards/accuracy_reward": 0.5794921875,
|
|
"rewards/brier_reward": 0.8018252968788147,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002854443807154894,
|
|
"rewards/frontier_coverage_1": 0.08887268304824829,
|
|
"rewards/frontier_coverage_10": 0.08834582418203354,
|
|
"rewards/frontier_coverage_15": 0.08466917127370835,
|
|
"rewards/frontier_coverage_20": 0.060265733301639555,
|
|
"rewards/frontier_coverage_25": 0.051320061832666394,
|
|
"rewards/frontier_coverage_5": 0.08884882032871247,
|
|
"rewards/frontier_ece_reward": 0.0020072998944669963,
|
|
"rewards/frontier_entropy_batch_reward": -0.2065118134021759,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09168701171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.170703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12381611913442611,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045843505859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045843505859375,
|
|
"signal/advantage_abs_mean": 0.06842098832130432,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06842098832130432,
|
|
"signal/advantage_pre_scale_std": 0.10695004910230636,
|
|
"signal/advantage_std": 0.10695004910230636,
|
|
"signal/brier_reward/centered_abs_mean": 0.10891520380973815,
|
|
"signal/brier_reward/group_bin_occupancy": 0.833203125,
|
|
"signal/brier_reward/group_std_mean": 0.14026750177145003,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01361440047621727,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01361440047621727,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002719699405133724,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00452100308611989,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8682617489248514e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8682617489248514e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14558494091033936,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.857421875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18775410056114197,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026059703435748816,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026059703435748816,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14347892701625825,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.858203125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1850330114364624,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025682727340608836,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025682727340608836,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12778309732675552,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16491487622261047,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002287317393347621,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002287317393347621,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07579994648694992,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09820334166288376,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013568190392106772,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013568190392106772,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05239018201828003,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.92109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06699474751949311,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009377842419780791,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009377842419780791,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14545360803604127,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.857421875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18758580982685089,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002603619499132037,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002603619499132037,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004751469660550356,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.9015625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006205685343593359,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005939337075687945,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005939337075687945,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.265206840634346,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730078125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33529953956604003,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03315085507929325,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03315085507929325,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26691739801367803,
|
|
"calibration/batch_distribution_entropy": 0.9774445841742269,
|
|
"calibration/batch_entropy_100bins": 0.9696266513555238,
|
|
"calibration/batch_entropy_10bins": 0.9774445841742269,
|
|
"calibration/batch_entropy_50bins": 0.9781202652767531,
|
|
"calibration/batch_uniqueness": 0.9535858154296875,
|
|
"calibration/buffer_distribution_entropy": 0.9989725991598203,
|
|
"calibration/buffer_entropy_100bins": 0.9989539238306406,
|
|
"calibration/buffer_entropy_10bins": 0.9989725991598203,
|
|
"calibration/buffer_entropy_50bins": 0.9990613562739397,
|
|
"calibration/confidence_entropy": 0.4988719882240712,
|
|
"calibration/coverage@0%": 0.03828125,
|
|
"calibration/coverage@1%": 0.03828125,
|
|
"calibration/coverage@10%": 0.25078125,
|
|
"calibration/coverage@15%": 0.294921875,
|
|
"calibration/coverage@20%": 0.372265625,
|
|
"calibration/coverage@25%": 0.455859375,
|
|
"calibration/coverage@30%": 0.5828125,
|
|
"calibration/coverage@5%": 0.15078125,
|
|
"calibration/ece": 0.10967759805603246,
|
|
"calibration/mean_confidence": 0.48722149702527523,
|
|
"calibration/prompt_uniqueness": 0.846044921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 442.4,
|
|
"completions/max_terminated_length": 442.4,
|
|
"completions/mean_length": 196.14306640625,
|
|
"completions/mean_terminated_length": 196.14306640625,
|
|
"completions/min_length": 103.2,
|
|
"completions/min_terminated_length": 103.2,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0010814516572281718,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 878421642.0,
|
|
"reward": 0.8648443460464478,
|
|
"reward_std": 0.08550989478826523,
|
|
"rewards/accuracy_reward": 0.55498046875,
|
|
"rewards/brier_reward": 0.8163813591003418,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0023734794463962316,
|
|
"rewards/frontier_coverage_1": 0.11868036091327668,
|
|
"rewards/frontier_coverage_10": 0.11617294400930404,
|
|
"rewards/frontier_coverage_15": 0.10371433347463607,
|
|
"rewards/frontier_coverage_20": 0.07033977434039115,
|
|
"rewards/frontier_coverage_25": 0.0553276963531971,
|
|
"rewards/frontier_coverage_5": 0.11858219057321548,
|
|
"rewards/frontier_ece_reward": 0.0023112162714824082,
|
|
"rewards/frontier_entropy_batch_reward": -0.20258863270282745,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090997314453125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.169140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12111333757638931,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0454986572265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0454986572265625,
|
|
"signal/advantage_abs_mean": 0.06704051047563553,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06704051047563553,
|
|
"signal/advantage_pre_scale_std": 0.10539929419755936,
|
|
"signal/advantage_std": 0.10539929419755936,
|
|
"signal/brier_reward/centered_abs_mean": 0.10057551860809326,
|
|
"signal/brier_reward/group_bin_occupancy": 0.841796875,
|
|
"signal/brier_reward/group_std_mean": 0.13067585229873657,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012571939826011657,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012571939826011657,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020601370837539435,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032557172700762747,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.687645366881043e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.687645366881043e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14384538531303406,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.858984375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18559444546699524,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025748323649168016,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025748323649168016,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14038788378238679,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.85859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18115375339984893,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025129430461674927,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025129430461674927,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12292735427618026,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.855859375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15857858061790467,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022003995720297096,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022003995720297096,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07247701585292816,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09377783834934235,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012973385397344827,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012973385397344827,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05139257907867432,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.932421875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06542427986860275,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000919927132781595,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000919927132781595,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14371106922626495,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1854223281145096,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025724280625581742,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025724280625581742,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004667305201292038,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.894921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006094491388648748,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005834131501615047,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005834131501615047,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2624901086091995,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3327677011489868,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03281126357614994,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03281126357614994,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31542229607064637,
|
|
"calibration/batch_distribution_entropy": 0.9739707310173376,
|
|
"calibration/batch_entropy_100bins": 0.9635504948025592,
|
|
"calibration/batch_entropy_10bins": 0.9739707310173376,
|
|
"calibration/batch_entropy_50bins": 0.9730236720183898,
|
|
"calibration/batch_uniqueness": 0.951666259765625,
|
|
"calibration/buffer_distribution_entropy": 0.9989930255604248,
|
|
"calibration/buffer_entropy_100bins": 0.9989505577332609,
|
|
"calibration/buffer_entropy_10bins": 0.9989930255604248,
|
|
"calibration/buffer_entropy_50bins": 0.999068960960001,
|
|
"calibration/confidence_entropy": 0.5086728375442844,
|
|
"calibration/coverage@0%": 0.0296875,
|
|
"calibration/coverage@1%": 0.0296875,
|
|
"calibration/coverage@10%": 0.120703125,
|
|
"calibration/coverage@15%": 0.2734375,
|
|
"calibration/coverage@20%": 0.417578125,
|
|
"calibration/coverage@25%": 0.491796875,
|
|
"calibration/coverage@30%": 0.5453125,
|
|
"calibration/coverage@5%": 0.102734375,
|
|
"calibration/ece": 0.1579181461275594,
|
|
"calibration/mean_confidence": 0.5313260932295083,
|
|
"calibration/prompt_uniqueness": 0.837255859375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 421.6,
|
|
"completions/max_terminated_length": 421.6,
|
|
"completions/mean_length": 194.3615234375,
|
|
"completions/mean_terminated_length": 194.3615234375,
|
|
"completions/min_length": 101.4,
|
|
"completions/min_terminated_length": 101.4,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0008651363314129412,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 895426272.0,
|
|
"reward": 0.8523078799247742,
|
|
"reward_std": 0.08377386629581451,
|
|
"rewards/accuracy_reward": 0.53427734375,
|
|
"rewards/brier_reward": 0.8082629799842834,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002553269104100764,
|
|
"rewards/frontier_coverage_1": 0.11806271076202393,
|
|
"rewards/frontier_coverage_10": 0.11662331819534302,
|
|
"rewards/frontier_coverage_15": 0.10539236664772034,
|
|
"rewards/frontier_coverage_20": 0.06768357157707214,
|
|
"rewards/frontier_coverage_25": 0.04773269593715668,
|
|
"rewards/frontier_coverage_5": 0.1179862841963768,
|
|
"rewards/frontier_ece_reward": 0.0020762649830430744,
|
|
"rewards/frontier_entropy_batch_reward": -0.21074254512786866,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076470947265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.165625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1066226527094841,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0382354736328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0382354736328125,
|
|
"signal/advantage_abs_mean": 0.06452079713344575,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06452079713344575,
|
|
"signal/advantage_pre_scale_std": 0.10078646242618561,
|
|
"signal/advantage_std": 0.10078646242618561,
|
|
"signal/brier_reward/centered_abs_mean": 0.095879465341568,
|
|
"signal/brier_reward/group_bin_occupancy": 0.86015625,
|
|
"signal/brier_reward/group_std_mean": 0.12415737211704254,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011984933167696,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011984933167696,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002056886232458055,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73671875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032547391252592205,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.681826237880159e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.681826237880159e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13201110661029816,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1731318861246109,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023629988078027963,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023629988078027963,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12971103489398955,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.872265625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17014427483081818,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00232182745821774,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00232182745821774,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11497683823108673,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15072887837886811,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002058085426688194,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002058085426688194,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06714669689536094,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08797992616891862,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001201925822533667,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001201925822533667,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04629442393779755,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.921484375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.059931250661611556,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008286701398901641,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008286701398901641,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1318788543343544,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17296003997325898,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023606313858181237,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023606313858181237,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004494541138410568,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0058809550479054454,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000561817642301321,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000561817642301321,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.279247921705246,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3494983911514282,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03490599021315575,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03490599021315575,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2551935935751549,
|
|
"calibration/batch_distribution_entropy": 0.9551683899262354,
|
|
"calibration/batch_entropy_100bins": 0.9561740490407103,
|
|
"calibration/batch_entropy_10bins": 0.9551683899262354,
|
|
"calibration/batch_entropy_50bins": 0.9628317632115422,
|
|
"calibration/batch_uniqueness": 0.9490325927734375,
|
|
"calibration/buffer_distribution_entropy": 0.9990927517675319,
|
|
"calibration/buffer_entropy_100bins": 0.9990242562445925,
|
|
"calibration/buffer_entropy_10bins": 0.9990927517675319,
|
|
"calibration/buffer_entropy_50bins": 0.9991437917105215,
|
|
"calibration/confidence_entropy": 0.49864896977334033,
|
|
"calibration/coverage@0%": 0.034375,
|
|
"calibration/coverage@1%": 0.034375,
|
|
"calibration/coverage@10%": 0.194921875,
|
|
"calibration/coverage@15%": 0.2421875,
|
|
"calibration/coverage@20%": 0.34765625,
|
|
"calibration/coverage@25%": 0.444140625,
|
|
"calibration/coverage@30%": 0.575,
|
|
"calibration/coverage@5%": 0.108203125,
|
|
"calibration/ece": 0.12198031278700468,
|
|
"calibration/mean_confidence": 0.5995220320808029,
|
|
"calibration/prompt_uniqueness": 0.8458984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 647.8,
|
|
"completions/max_terminated_length": 428.6,
|
|
"completions/mean_length": 196.71005859375,
|
|
"completions/mean_terminated_length": 196.57922973632813,
|
|
"completions/min_length": 105.8,
|
|
"completions/min_terminated_length": 105.8,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0012777691008523107,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 912427399.0,
|
|
"reward": 0.8713708758354187,
|
|
"reward_std": 0.09100723564624787,
|
|
"rewards/accuracy_reward": 0.587109375,
|
|
"rewards/brier_reward": 0.8016261577606201,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0026119566056877373,
|
|
"rewards/frontier_coverage_1": 0.07935620397329331,
|
|
"rewards/frontier_coverage_10": 0.07880553454160691,
|
|
"rewards/frontier_coverage_15": 0.07368464544415473,
|
|
"rewards/frontier_coverage_20": 0.05397990569472313,
|
|
"rewards/frontier_coverage_25": 0.04986085593700409,
|
|
"rewards/frontier_coverage_5": 0.07933037877082824,
|
|
"rewards/frontier_ece_reward": 0.0016980181448161603,
|
|
"rewards/frontier_entropy_batch_reward": -0.23946044743061065,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0991943359375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12835633456707002,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04959716796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04959716796875,
|
|
"signal/advantage_abs_mean": 0.07214201688766479,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07214201688766479,
|
|
"signal/advantage_pre_scale_std": 0.10875225216150283,
|
|
"signal/advantage_std": 0.10875225216150283,
|
|
"signal/brier_reward/centered_abs_mean": 0.10874636620283126,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8484375,
|
|
"signal/brier_reward/group_std_mean": 0.13965383768081666,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013593295775353908,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013593295775353908,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023859881330281496,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73984375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003907088562846184,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.270918434485793e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.270918434485793e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15109747648239136,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19283765852451323,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00270464476197958,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00270464476197958,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.149289670586586,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86171875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19051893651485444,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002672284934669733,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002672284934669733,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13000792711973191,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.851953125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16591603457927703,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023271418176591396,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023271418176591396,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07444732487201691,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.896875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09547023475170135,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013326070504263044,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013326070504263044,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05278872922062874,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.923046875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06748096346855163,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009449182078242302,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009449182078242302,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15094164311885833,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19263845980167388,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002701855357736349,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002701855357736349,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004699286818504333,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.900390625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006102659367024898,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005874108523130417,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005874108523130417,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29436487555503843,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726171875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3652419447898865,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036795609444379804,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036795609444379804,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3689968283562816,
|
|
"calibration/batch_distribution_entropy": 0.9816647884171952,
|
|
"calibration/batch_entropy_100bins": 0.9690518445136315,
|
|
"calibration/batch_entropy_10bins": 0.9816647884171952,
|
|
"calibration/batch_entropy_50bins": 0.9786919422206465,
|
|
"calibration/batch_uniqueness": 0.9536102294921875,
|
|
"calibration/buffer_distribution_entropy": 0.9991432971050402,
|
|
"calibration/buffer_entropy_100bins": 0.9990850763393919,
|
|
"calibration/buffer_entropy_10bins": 0.9991432971050402,
|
|
"calibration/buffer_entropy_50bins": 0.9991792049124377,
|
|
"calibration/confidence_entropy": 0.47520039428209443,
|
|
"calibration/coverage@0%": 0.005859375,
|
|
"calibration/coverage@1%": 0.005859375,
|
|
"calibration/coverage@10%": 0.0296875,
|
|
"calibration/coverage@15%": 0.096484375,
|
|
"calibration/coverage@20%": 0.146875,
|
|
"calibration/coverage@25%": 0.214453125,
|
|
"calibration/coverage@30%": 0.278515625,
|
|
"calibration/coverage@5%": 0.02265625,
|
|
"calibration/ece": 0.1265384011632356,
|
|
"calibration/mean_confidence": 0.5028521730859784,
|
|
"calibration/prompt_uniqueness": 0.831884765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 452.8,
|
|
"completions/max_terminated_length": 452.8,
|
|
"completions/mean_length": 191.644921875,
|
|
"completions/mean_terminated_length": 191.644921875,
|
|
"completions/min_length": 97.6,
|
|
"completions/min_terminated_length": 97.6,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0009742515976540744,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 929536915.0,
|
|
"reward": 0.8399426817893982,
|
|
"reward_std": 0.09036057144403457,
|
|
"rewards/accuracy_reward": 0.51142578125,
|
|
"rewards/brier_reward": 0.7988796353340148,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.003130771638825536,
|
|
"rewards/frontier_coverage_1": 0.13325000703334808,
|
|
"rewards/frontier_coverage_10": 0.13197922110557556,
|
|
"rewards/frontier_coverage_15": 0.11483763456344605,
|
|
"rewards/frontier_coverage_20": 0.07162886634469032,
|
|
"rewards/frontier_coverage_25": 0.05139811635017395,
|
|
"rewards/frontier_coverage_5": 0.13309186547994614,
|
|
"rewards/frontier_ece_reward": 0.0023863946786150335,
|
|
"rewards/frontier_entropy_batch_reward": -0.21808099746704102,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.097894287109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1274777978658676,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0489471435546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0489471435546875,
|
|
"signal/advantage_abs_mean": 0.0720147468149662,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0720147468149662,
|
|
"signal/advantage_pre_scale_std": 0.11001690626144409,
|
|
"signal/advantage_std": 0.11001690626144409,
|
|
"signal/brier_reward/centered_abs_mean": 0.11212355941534043,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84609375,
|
|
"signal/brier_reward/group_std_mean": 0.14494499266147615,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014015444926917553,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014015444926917553,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028623202815651894,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.706640625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004754068516194821,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.123553055454977e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.123553055454977e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15715896785259248,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20266908705234526,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002813145564869046,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002813145564869046,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15548037588596345,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20054614543914795,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027830985840409995,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027830985840409995,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13485134840011598,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17428669035434724,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002413839101791382,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002413839101791382,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0783051684498787,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10148594677448272,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014016624772921205,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014016624772921205,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0531325563788414,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.930859375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06878565400838851,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009510727250017226,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009510727250017226,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15696605443954467,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20242418348789215,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002809692220762372,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002809692220762372,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004732540622353554,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.903125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006143409106880426,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005915675777941942,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005915675777941942,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28106330037117006,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.722265625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35157610177993776,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03513291254639626,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03513291254639626,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3373058154661964,
|
|
"calibration/batch_distribution_entropy": 0.9826539498416601,
|
|
"calibration/batch_entropy_100bins": 0.9682695002978061,
|
|
"calibration/batch_entropy_10bins": 0.9826539498416601,
|
|
"calibration/batch_entropy_50bins": 0.9762102980123724,
|
|
"calibration/batch_uniqueness": 0.953095282009279,
|
|
"calibration/buffer_distribution_entropy": 0.999126223691265,
|
|
"calibration/buffer_entropy_100bins": 0.9990657505371372,
|
|
"calibration/buffer_entropy_10bins": 0.999126223691265,
|
|
"calibration/buffer_entropy_50bins": 0.9991405062324873,
|
|
"calibration/confidence_entropy": 0.48298531010661866,
|
|
"calibration/coverage@0%": 0.016410072162426615,
|
|
"calibration/coverage@1%": 0.016410072162426615,
|
|
"calibration/coverage@10%": 0.0601661876223092,
|
|
"calibration/coverage@15%": 0.08399660591976517,
|
|
"calibration/coverage@20%": 0.3101944716242661,
|
|
"calibration/coverage@25%": 0.40160989481409004,
|
|
"calibration/coverage@30%": 0.4906815680039139,
|
|
"calibration/coverage@5%": 0.040628822162426616,
|
|
"calibration/ece": 0.14296123700914443,
|
|
"calibration/mean_confidence": 0.4870876866229802,
|
|
"calibration/prompt_uniqueness": 0.8266549736602498,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1082.6,
|
|
"completions/max_terminated_length": 495.4,
|
|
"completions/mean_length": 190.72861328125,
|
|
"completions/mean_terminated_length": 190.20317687988282,
|
|
"completions/min_length": 97.4,
|
|
"completions/min_terminated_length": 97.4,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0011140021961182356,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 946600824.0,
|
|
"reward": 0.8575990200042725,
|
|
"reward_std": 0.08079204559326172,
|
|
"rewards/accuracy_reward": 0.54970703125,
|
|
"rewards/brier_reward": 0.7908406734466553,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0027651153039187195,
|
|
"rewards/frontier_coverage_1": 0.10620979815721512,
|
|
"rewards/frontier_coverage_10": 0.10566670447587967,
|
|
"rewards/frontier_coverage_15": 0.09410437047481537,
|
|
"rewards/frontier_coverage_20": 0.06202979385852814,
|
|
"rewards/frontier_coverage_25": 0.04811366051435471,
|
|
"rewards/frontier_coverage_5": 0.10607990473508835,
|
|
"rewards/frontier_ece_reward": 0.0011842235224321484,
|
|
"rewards/frontier_entropy_batch_reward": -0.20288202166557312,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.081243896484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11124018728733062,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0406219482421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0406219482421875,
|
|
"signal/advantage_abs_mean": 0.061866439133882525,
|
|
"signal/advantage_pre_scale_abs_mean": 0.061866439133882525,
|
|
"signal/advantage_pre_scale_std": 0.0962506964802742,
|
|
"signal/advantage_std": 0.0962506964802742,
|
|
"signal/brier_reward/centered_abs_mean": 0.11059802174568176,
|
|
"signal/brier_reward/group_bin_occupancy": 0.858984375,
|
|
"signal/brier_reward/group_std_mean": 0.14146918654441834,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01382475271821022,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01382475271821022,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002222577598877251,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003623427450656891,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.978413733420894e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.978413733420894e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15609081983566284,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20036340057849883,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027940256986767054,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027940256986767054,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15488055050373079,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19879828989505768,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027723620180040596,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027723620180040596,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13422557562589646,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.857421875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17230915725231172,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024026377592235803,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024026377592235803,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07715532034635544,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09921250641345977,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013810801785439253,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013810801785439253,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05220200940966606,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.918359375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06679405272006989,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000934415915980935,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000934415915980935,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1558253914117813,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20002435743808747,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027892745565623045,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027892745565623045,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0045457611791789535,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.897265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005920033343136311,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005682201473973692,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005682201473973692,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2616787314414978,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.728125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3359409987926483,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03270984143018722,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03270984143018722,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3549690512751742,
|
|
"calibration/batch_distribution_entropy": 0.977302492713015,
|
|
"calibration/batch_entropy_100bins": 0.9687005838156748,
|
|
"calibration/batch_entropy_10bins": 0.977302492713015,
|
|
"calibration/batch_entropy_50bins": 0.9769308698120451,
|
|
"calibration/batch_uniqueness": 0.9527252197265625,
|
|
"calibration/buffer_distribution_entropy": 0.9990242862387232,
|
|
"calibration/buffer_entropy_100bins": 0.9989872340708235,
|
|
"calibration/buffer_entropy_10bins": 0.9990242862387232,
|
|
"calibration/buffer_entropy_50bins": 0.9990547510383561,
|
|
"calibration/confidence_entropy": 0.49627782732928577,
|
|
"calibration/coverage@0%": 0.01796875,
|
|
"calibration/coverage@1%": 0.01796875,
|
|
"calibration/coverage@10%": 0.061328125,
|
|
"calibration/coverage@15%": 0.096484375,
|
|
"calibration/coverage@20%": 0.25859375,
|
|
"calibration/coverage@25%": 0.393359375,
|
|
"calibration/coverage@30%": 0.455859375,
|
|
"calibration/coverage@5%": 0.0484375,
|
|
"calibration/ece": 0.15157571405774412,
|
|
"calibration/mean_confidence": 0.49334667608446414,
|
|
"calibration/prompt_uniqueness": 0.845263671875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 865.6,
|
|
"completions/max_terminated_length": 455.2,
|
|
"completions/mean_length": 191.36181640625,
|
|
"completions/mean_terminated_length": 191.0985565185547,
|
|
"completions/min_length": 86.0,
|
|
"completions/min_terminated_length": 86.0,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.0010114161996170878,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 963611665.0,
|
|
"reward": 0.8577099800109863,
|
|
"reward_std": 0.08399459272623062,
|
|
"rewards/accuracy_reward": 0.54443359375,
|
|
"rewards/brier_reward": 0.8012910604476928,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0027018039952963592,
|
|
"rewards/frontier_coverage_1": 0.1103449311107397,
|
|
"rewards/frontier_coverage_10": 0.10959461368620396,
|
|
"rewards/frontier_coverage_15": 0.09755977056920528,
|
|
"rewards/frontier_coverage_20": 0.06432019025087357,
|
|
"rewards/frontier_coverage_25": 0.05184435471892357,
|
|
"rewards/frontier_coverage_5": 0.11015897234901786,
|
|
"rewards/frontier_ece_reward": 0.001691946922801435,
|
|
"rewards/frontier_entropy_batch_reward": -0.19574475586414336,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080206298828125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.167578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11207558661699295,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0401031494140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0401031494140625,
|
|
"signal/advantage_abs_mean": 0.06401625275611877,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06401625275611877,
|
|
"signal/advantage_pre_scale_std": 0.10008785128593445,
|
|
"signal/advantage_std": 0.10008785128593445,
|
|
"signal/brier_reward/centered_abs_mean": 0.11323688179254532,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8328125,
|
|
"signal/brier_reward/group_std_mean": 0.14842240512371063,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014154610224068165,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014154610224068165,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023412939393892885,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.730859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003946681786328554,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.190915824437979e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.190915824437979e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15513492822647096,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20151489973068237,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027769151609390976,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027769151609390976,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15395722687244415,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19999560713768005,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027558341156691314,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027558341156691314,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13296782821416855,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.853515625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17283936440944672,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023801239673048257,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023801239673048257,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0778110533952713,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10108065158128739,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013928177999332548,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013928177999332548,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0549514427781105,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9234375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07073460221290588,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000983630819246173,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000983630819246173,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15491481125354767,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20123314261436462,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027729750145226717,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027729750145226717,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004723855573683977,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006184379477053881,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005904819467104971,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005904819467104971,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2582594394683838,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33268279433250425,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032282429933547976,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032282429933547976,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.41280512387379353,
|
|
"calibration/batch_distribution_entropy": 0.983191233962286,
|
|
"calibration/batch_entropy_100bins": 0.9707893275502993,
|
|
"calibration/batch_entropy_10bins": 0.983191233962286,
|
|
"calibration/batch_entropy_50bins": 0.9796747150760041,
|
|
"calibration/batch_uniqueness": 0.9539581298828125,
|
|
"calibration/buffer_distribution_entropy": 0.9990114243714435,
|
|
"calibration/buffer_entropy_100bins": 0.9990231310977965,
|
|
"calibration/buffer_entropy_10bins": 0.9990114243714435,
|
|
"calibration/buffer_entropy_50bins": 0.9990782041438109,
|
|
"calibration/confidence_entropy": 0.5056221875295627,
|
|
"calibration/coverage@0%": 0.005859375,
|
|
"calibration/coverage@1%": 0.005859375,
|
|
"calibration/coverage@10%": 0.0171875,
|
|
"calibration/coverage@15%": 0.019140625,
|
|
"calibration/coverage@20%": 0.033203125,
|
|
"calibration/coverage@25%": 0.10234375,
|
|
"calibration/coverage@30%": 0.267578125,
|
|
"calibration/coverage@5%": 0.01171875,
|
|
"calibration/ece": 0.13720565678424684,
|
|
"calibration/mean_confidence": 0.5138671169361444,
|
|
"calibration/prompt_uniqueness": 0.854052734375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 698.8,
|
|
"completions/max_terminated_length": 482.6,
|
|
"completions/mean_length": 188.668359375,
|
|
"completions/mean_terminated_length": 188.5367401123047,
|
|
"completions/min_length": 93.4,
|
|
"completions/min_terminated_length": 93.4,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0009088640799745917,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 980570445.0,
|
|
"reward": 0.8483018755912781,
|
|
"reward_std": 0.08071554303169251,
|
|
"rewards/accuracy_reward": 0.53251953125,
|
|
"rewards/brier_reward": 0.7901157855987548,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.003404234582558274,
|
|
"rewards/frontier_coverage_1": 0.11180114150047302,
|
|
"rewards/frontier_coverage_10": 0.11092503815889358,
|
|
"rewards/frontier_coverage_15": 0.09873643815517426,
|
|
"rewards/frontier_coverage_20": 0.0643385447561741,
|
|
"rewards/frontier_coverage_25": 0.052167801558971404,
|
|
"rewards/frontier_coverage_5": 0.1116182416677475,
|
|
"rewards/frontier_ece_reward": 0.0019177033798769116,
|
|
"rewards/frontier_entropy_batch_reward": -0.21312889754772185,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.072637939453125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.163671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10080017894506454,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363189697265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0363189697265625,
|
|
"signal/advantage_abs_mean": 0.06181541979312897,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06181541979312897,
|
|
"signal/advantage_pre_scale_std": 0.09706850945949555,
|
|
"signal/advantage_std": 0.09706850945949555,
|
|
"signal/brier_reward/centered_abs_mean": 0.111118184030056,
|
|
"signal/brier_reward/group_bin_occupancy": 0.83671875,
|
|
"signal/brier_reward/group_std_mean": 0.14327452182769776,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013889773003757,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013889773003757,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029117835219949484,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004748767055571079,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2120923646725716e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2120923646725716e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14507719576358796,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.85546875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1872227430343628,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002596881752833724,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002596881752833724,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1439109742641449,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8546875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18574815690517427,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002576006343588233,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002576006343588233,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1262580692768097,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.846484375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16331798434257508,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022600193507969378,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022600193507969378,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0749655857682228,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09689257442951202,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013418839545920492,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013418839545920492,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05532756522297859,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.93046875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07019431442022324,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009903633617796004,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009903633617796004,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14484555274248123,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85546875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18693141639232635,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002592735271900892,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002592735271900892,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004770417790859937,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0062720650807023045,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005963022238574922,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005963022238574922,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27077251076698305,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34694740176200867,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03384656384587288,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03384656384587288,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24858498275267107,
|
|
"calibration/batch_distribution_entropy": 0.9861492452770714,
|
|
"calibration/batch_entropy_100bins": 0.9737597827623423,
|
|
"calibration/batch_entropy_10bins": 0.9861492452770714,
|
|
"calibration/batch_entropy_50bins": 0.9812966268493726,
|
|
"calibration/batch_uniqueness": 0.954254150390625,
|
|
"calibration/buffer_distribution_entropy": 0.9988957247957387,
|
|
"calibration/buffer_entropy_100bins": 0.9989956482917522,
|
|
"calibration/buffer_entropy_10bins": 0.9988957247957387,
|
|
"calibration/buffer_entropy_50bins": 0.9990108307728066,
|
|
"calibration/confidence_entropy": 0.4974394635763413,
|
|
"calibration/coverage@0%": 0.025,
|
|
"calibration/coverage@1%": 0.025,
|
|
"calibration/coverage@10%": 0.169921875,
|
|
"calibration/coverage@15%": 0.289453125,
|
|
"calibration/coverage@20%": 0.42890625,
|
|
"calibration/coverage@25%": 0.5796875,
|
|
"calibration/coverage@30%": 0.6609375,
|
|
"calibration/coverage@5%": 0.09921875,
|
|
"calibration/ece": 0.10266510009719523,
|
|
"calibration/mean_confidence": 0.472734394472595,
|
|
"calibration/prompt_uniqueness": 0.839453125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 446.6,
|
|
"completions/max_terminated_length": 446.6,
|
|
"completions/mean_length": 187.97138671875,
|
|
"completions/mean_terminated_length": 187.97138671875,
|
|
"completions/min_length": 97.2,
|
|
"completions/min_terminated_length": 97.2,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0011019782396033406,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 997470696.0,
|
|
"reward": 0.8485186576843262,
|
|
"reward_std": 0.09234340786933899,
|
|
"rewards/accuracy_reward": 0.53505859375,
|
|
"rewards/brier_reward": 0.7864872336387634,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0030688193626701834,
|
|
"rewards/frontier_coverage_1": 0.10997713655233383,
|
|
"rewards/frontier_coverage_10": 0.10910578817129135,
|
|
"rewards/frontier_coverage_15": 0.09824755191802978,
|
|
"rewards/frontier_coverage_20": 0.06621812656521797,
|
|
"rewards/frontier_coverage_25": 0.04666025787591934,
|
|
"rewards/frontier_coverage_5": 0.10978993475437164,
|
|
"rewards/frontier_ece_reward": 0.0015931544359773398,
|
|
"rewards/frontier_entropy_batch_reward": -0.2170539140701294,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.112640380859375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14598776698112487,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0563201904296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0563201904296875,
|
|
"signal/advantage_abs_mean": 0.07284359484910966,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07284359484910966,
|
|
"signal/advantage_pre_scale_std": 0.11034233421087265,
|
|
"signal/advantage_std": 0.11034233421087265,
|
|
"signal/brier_reward/centered_abs_mean": 0.11176075041294098,
|
|
"signal/brier_reward/group_bin_occupancy": 0.832421875,
|
|
"signal/brier_reward/group_std_mean": 0.14492084681987763,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013970093801617622,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013970093801617622,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024296872783452273,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004007898364216089,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.349140144768171e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.349140144768171e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1726018726825714,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22027516961097718,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003089573513716459,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003089573513716459,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17125667929649352,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8515625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21858170330524446,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030654944013804196,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030654944013804196,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1538640648126602,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.846484375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19664142429828643,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027541667222976685,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027541667222976685,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08876172602176666,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11390969753265381,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015888348687440157,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015888348687440157,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.054720057547092436,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.919140625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07047712504863739,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009794890065677464,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009794890065677464,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1723033905029297,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21990018784999849,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030842306092381476,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030842306092381476,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005012043006718159,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.90234375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006545277405530214,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006265053758397699,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006265053758397699,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27133584320545195,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726953125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34131971597671507,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033916980400681494,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033916980400681494,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3256529793283661,
|
|
"calibration/batch_distribution_entropy": 0.984952501452715,
|
|
"calibration/batch_entropy_100bins": 0.9718621359045295,
|
|
"calibration/batch_entropy_10bins": 0.984952501452715,
|
|
"calibration/batch_entropy_50bins": 0.9805238677858856,
|
|
"calibration/batch_uniqueness": 0.954693603515625,
|
|
"calibration/buffer_distribution_entropy": 0.9989409552203344,
|
|
"calibration/buffer_entropy_100bins": 0.9990398793740454,
|
|
"calibration/buffer_entropy_10bins": 0.9989409552203344,
|
|
"calibration/buffer_entropy_50bins": 0.9990565379611495,
|
|
"calibration/confidence_entropy": 0.4897216704300299,
|
|
"calibration/coverage@0%": 0.01171875,
|
|
"calibration/coverage@1%": 0.01171875,
|
|
"calibration/coverage@10%": 0.104296875,
|
|
"calibration/coverage@15%": 0.2359375,
|
|
"calibration/coverage@20%": 0.308984375,
|
|
"calibration/coverage@25%": 0.3640625,
|
|
"calibration/coverage@30%": 0.5078125,
|
|
"calibration/coverage@5%": 0.012109375,
|
|
"calibration/ece": 0.13947520019378662,
|
|
"calibration/mean_confidence": 0.5198152612431157,
|
|
"calibration/prompt_uniqueness": 0.84501953125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 652.8,
|
|
"completions/max_terminated_length": 458.2,
|
|
"completions/mean_length": 188.3568359375,
|
|
"completions/mean_terminated_length": 188.22555847167968,
|
|
"completions/min_length": 101.8,
|
|
"completions/min_terminated_length": 101.8,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0008233313565142453,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 1014339790.0,
|
|
"reward": 0.8462372064590454,
|
|
"reward_std": 0.07700852155685425,
|
|
"rewards/accuracy_reward": 0.5169921875,
|
|
"rewards/brier_reward": 0.8101608753204346,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0029094903729856014,
|
|
"rewards/frontier_coverage_1": 0.13561428487300872,
|
|
"rewards/frontier_coverage_10": 0.13452683985233307,
|
|
"rewards/frontier_coverage_15": 0.1212164431810379,
|
|
"rewards/frontier_coverage_20": 0.07751094549894333,
|
|
"rewards/frontier_coverage_25": 0.05298488959670067,
|
|
"rewards/frontier_coverage_5": 0.135471972823143,
|
|
"rewards/frontier_ece_reward": 0.0022863436490297316,
|
|
"rewards/frontier_entropy_batch_reward": -0.20344921350479125,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0708251953125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.162890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.0995680645108223,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03541259765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03541259765625,
|
|
"signal/advantage_abs_mean": 0.05835134610533714,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05835134610533714,
|
|
"signal/advantage_pre_scale_std": 0.09123541563749313,
|
|
"signal/advantage_std": 0.09123541563749313,
|
|
"signal/brier_reward/centered_abs_mean": 0.1000775396823883,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8765625,
|
|
"signal/brier_reward/group_std_mean": 0.12929840236902237,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012509692460298538,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012509692460298538,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023374527459964155,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.745703125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003886171476915479,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.184040299151093e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.184040299151093e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14079618453979492,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18256475627422333,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002520251739770174,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002520251739770174,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13965638279914855,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.181106236577034,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024998491164296864,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024998491164296864,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12565270364284514,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16323770582675934,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022491833195090296,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022491833195090296,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0731646478176117,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.895703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0953644946217537,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013096471317112445,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013096471317112445,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05031884089112282,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06426827237010002,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009007072076201439,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009007072076201439,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14063106179237367,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18235519230365754,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025172959081828593,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025172959081828593,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0048952271230518814,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006365635897964239,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006119033903814852,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006119033903814852,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26704747676849366,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33781918287277224,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03338093459606171,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03338093459606171,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.4575152377276934,
|
|
"eval_calibration/batch_distribution_entropy": 0.9083536036370418,
|
|
"eval_calibration/batch_entropy_100bins": 0.7084673039767129,
|
|
"eval_calibration/batch_entropy_10bins": 0.9083536036370418,
|
|
"eval_calibration/batch_entropy_50bins": 0.7941923347176845,
|
|
"eval_calibration/batch_uniqueness": 0.8974609375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9987965314883902,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9989874140319082,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9987965314883902,
|
|
"eval_calibration/buffer_entropy_50bins": 0.999000163227866,
|
|
"eval_calibration/confidence_entropy": 0.4877032802303043,
|
|
"eval_calibration/coverage@0%": 0.0859375,
|
|
"eval_calibration/coverage@1%": 0.0859375,
|
|
"eval_calibration/coverage@10%": 0.0859375,
|
|
"eval_calibration/coverage@15%": 0.09375,
|
|
"eval_calibration/coverage@20%": 0.1015625,
|
|
"eval_calibration/coverage@25%": 0.1484375,
|
|
"eval_calibration/coverage@30%": 0.15625,
|
|
"eval_calibration/coverage@5%": 0.0859375,
|
|
"eval_calibration/ece": 0.20039907035790783,
|
|
"eval_calibration/mean_confidence": 0.4575770178494413,
|
|
"eval_calibration/prompt_uniqueness": 0.8974609375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 295.25,
|
|
"eval_completions/max_terminated_length": 295.25,
|
|
"eval_completions/mean_length": 189.70380020141602,
|
|
"eval_completions/mean_terminated_length": 189.70380020141602,
|
|
"eval_completions/min_length": 116.75,
|
|
"eval_completions/min_terminated_length": 116.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1014339790.0,
|
|
"eval_reward": 0.7071669399738312,
|
|
"eval_reward_std": 0.226898942142725,
|
|
"eval_rewards/accuracy_reward": 0.427734375,
|
|
"eval_rewards/brier_reward": 0.807328924536705,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.003334582201205194,
|
|
"eval_rewards/frontier_coverage_1": 0.20210690423846245,
|
|
"eval_rewards/frontier_coverage_10": 0.2005590945482254,
|
|
"eval_rewards/frontier_coverage_15": 0.18310636281967163,
|
|
"eval_rewards/frontier_coverage_20": 0.10963826067745686,
|
|
"eval_rewards/frontier_coverage_25": 0.05915482249110937,
|
|
"eval_rewards/frontier_coverage_5": 0.20197707042098045,
|
|
"eval_rewards/frontier_ece_reward": 0.002569766016677022,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 17.0706,
|
|
"eval_samples_per_second": 29.29,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4730224609375,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4935857355594635,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23651123046875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23651123046875,
|
|
"eval_signal/advantage_abs_mean": 0.21183785423636436,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21183785423636436,
|
|
"eval_signal/advantage_pre_scale_std": 0.22447463124990463,
|
|
"eval_signal/advantage_std": 0.22447463124990463,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1758808195590973,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.9296875,
|
|
"eval_signal/brier_reward/group_std_mean": 0.22358601912856102,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02198510244488716,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02198510244488716,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004215276916511357,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.640625,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008374640950933099,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.545345397375058e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.545345397375058e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3614576756954193,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.430373378098011,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00647009233944118,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00647009233944118,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35852116346359253,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.42694830149412155,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0064175286097452044,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0064175286097452044,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.32429099828004837,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9453125,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.387409083545208,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005804808693937957,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005804808693937957,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1720643900334835,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8984375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.21237896382808685,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030799524392932653,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030799524392932653,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08441895246505737,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.1048442255705595,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015110991662368178,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015110991662368178,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3611843213438988,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4300566017627716,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006465199403464794,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006465199403464794,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006295109633356333,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.008172678295522928,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007868887041695416,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007868887041695416,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.234,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26956602079129766,
|
|
"calibration/batch_distribution_entropy": 0.975847157951916,
|
|
"calibration/batch_entropy_100bins": 0.9649908946480957,
|
|
"calibration/batch_entropy_10bins": 0.975847157951916,
|
|
"calibration/batch_entropy_50bins": 0.9739094070267618,
|
|
"calibration/batch_uniqueness": 0.95203857421875,
|
|
"calibration/buffer_distribution_entropy": 0.9987995026989941,
|
|
"calibration/buffer_entropy_100bins": 0.9989820996482228,
|
|
"calibration/buffer_entropy_10bins": 0.9987995026989941,
|
|
"calibration/buffer_entropy_50bins": 0.9989938509658627,
|
|
"calibration/confidence_entropy": 0.5116255959622197,
|
|
"calibration/coverage@0%": 0.014453125,
|
|
"calibration/coverage@1%": 0.014453125,
|
|
"calibration/coverage@10%": 0.26015625,
|
|
"calibration/coverage@15%": 0.36953125,
|
|
"calibration/coverage@20%": 0.46171875,
|
|
"calibration/coverage@25%": 0.51875,
|
|
"calibration/coverage@30%": 0.59140625,
|
|
"calibration/coverage@5%": 0.1109375,
|
|
"calibration/ece": 0.13497489066439078,
|
|
"calibration/mean_confidence": 0.49586762510521404,
|
|
"calibration/prompt_uniqueness": 0.840478515625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 517.2,
|
|
"completions/max_terminated_length": 517.2,
|
|
"completions/mean_length": 192.8546875,
|
|
"completions/mean_terminated_length": 192.8546875,
|
|
"completions/min_length": 104.6,
|
|
"completions/min_terminated_length": 104.6,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.00106345908716321,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1031175742.0,
|
|
"reward": 0.8589818477630615,
|
|
"reward_std": 0.08459014743566513,
|
|
"rewards/accuracy_reward": 0.554296875,
|
|
"rewards/brier_reward": 0.7969029545783997,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0025948323775082825,
|
|
"rewards/frontier_coverage_1": 0.10237730550579727,
|
|
"rewards/frontier_coverage_10": 0.10188237186521291,
|
|
"rewards/frontier_coverage_15": 0.09465207355096936,
|
|
"rewards/frontier_coverage_20": 0.066153160110116,
|
|
"rewards/frontier_coverage_25": 0.04752057008445263,
|
|
"rewards/frontier_coverage_5": 0.102328123152256,
|
|
"rewards/frontier_ece_reward": 0.001362017064820975,
|
|
"rewards/frontier_entropy_batch_reward": -0.2165709674358368,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0876708984375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12027212083339692,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04383544921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04383544921875,
|
|
"signal/advantage_abs_mean": 0.06527443826198578,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06527443826198578,
|
|
"signal/advantage_pre_scale_std": 0.09959482550621032,
|
|
"signal/advantage_std": 0.09959482550621032,
|
|
"signal/brier_reward/centered_abs_mean": 0.10134001821279526,
|
|
"signal/brier_reward/group_bin_occupancy": 0.856640625,
|
|
"signal/brier_reward/group_std_mean": 0.13163287788629532,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012667502276599407,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012667502276599407,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002029223274439573,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.730078125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033127402421087027,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.632309453678317e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.632309453678317e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15118311196565629,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86640625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1950996220111847,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002706177672371268,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002706177672371268,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15004239976406097,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1936278909444809,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002685758890584111,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002685758890584111,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13605864495038986,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1756308764219284,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002435449743643403,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002435449743643403,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0769604966044426,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09997625052928924,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013775928178802132,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013775928178802132,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04939193576574326,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9171875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06393922716379166,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008841155911795795,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008841155911795795,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1510834127664566,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19497024416923522,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002704393118619919,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002704393118619919,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004813673906028271,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006306731514632702,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006017092382535338,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006017092382535338,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2821305632591248,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3564057588577271,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0352663204073906,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0352663204073906,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34615962203217165,
|
|
"calibration/batch_distribution_entropy": 0.9822478080256127,
|
|
"calibration/batch_entropy_100bins": 0.9710197463283367,
|
|
"calibration/batch_entropy_10bins": 0.9822478080256127,
|
|
"calibration/batch_entropy_50bins": 0.9798762014394601,
|
|
"calibration/batch_uniqueness": 0.9536600873594491,
|
|
"calibration/buffer_distribution_entropy": 0.998897547628759,
|
|
"calibration/buffer_entropy_100bins": 0.999052160716665,
|
|
"calibration/buffer_entropy_10bins": 0.998897547628759,
|
|
"calibration/buffer_entropy_50bins": 0.9990733510872982,
|
|
"calibration/confidence_entropy": 0.4787588483252154,
|
|
"calibration/coverage@0%": 0.01914826932485323,
|
|
"calibration/coverage@1%": 0.01914826932485323,
|
|
"calibration/coverage@10%": 0.06448752446183953,
|
|
"calibration/coverage@15%": 0.11417563600782779,
|
|
"calibration/coverage@20%": 0.1626505931996086,
|
|
"calibration/coverage@25%": 0.4001697040117417,
|
|
"calibration/coverage@30%": 0.483008959148728,
|
|
"calibration/coverage@5%": 0.03282014432485323,
|
|
"calibration/ece": 0.11840179079530369,
|
|
"calibration/mean_confidence": 0.4507503788278635,
|
|
"calibration/prompt_uniqueness": 0.8376709746520552,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 606.0,
|
|
"completions/max_terminated_length": 390.0,
|
|
"completions/mean_length": 189.25205078125,
|
|
"completions/mean_terminated_length": 189.1205261230469,
|
|
"completions/min_length": 102.8,
|
|
"completions/min_terminated_length": 102.8,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0010464171646162868,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1048242163.0,
|
|
"reward": 0.8496513366699219,
|
|
"reward_std": 0.08171502947807312,
|
|
"rewards/accuracy_reward": 0.53125,
|
|
"rewards/brier_reward": 0.8029752850532532,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002545328298583627,
|
|
"rewards/frontier_coverage_1": 0.13065127432346343,
|
|
"rewards/frontier_coverage_10": 0.12966825366020202,
|
|
"rewards/frontier_coverage_15": 0.11746386885643005,
|
|
"rewards/frontier_coverage_20": 0.07562436014413834,
|
|
"rewards/frontier_coverage_25": 0.057191865891218184,
|
|
"rewards/frontier_coverage_5": 0.13061045855283737,
|
|
"rewards/frontier_ece_reward": 0.0018452441552653908,
|
|
"rewards/frontier_entropy_batch_reward": -0.2236760824918747,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083056640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.163671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10889570638537407,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0415283203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0415283203125,
|
|
"signal/advantage_abs_mean": 0.06335543915629387,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06335543915629387,
|
|
"signal/advantage_pre_scale_std": 0.0979221597313881,
|
|
"signal/advantage_std": 0.0979221597313881,
|
|
"signal/brier_reward/centered_abs_mean": 0.10264453142881394,
|
|
"signal/brier_reward/group_bin_occupancy": 0.843359375,
|
|
"signal/brier_reward/group_std_mean": 0.131204953789711,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012830566428601742,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012830566428601742,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020562576595693828,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74609375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034046342596411707,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.680701047414914e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.680701047414914e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15361351668834686,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1951121598482132,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00274968184530735,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00274968184530735,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1525499314069748,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1937567949295044,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027306437492370605,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027306437492370605,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13804518431425095,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1754040390253067,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024710086872801185,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024710086872801185,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07695982903242111,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.89375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09852775484323502,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001377580827102065,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001377580827102065,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05279005914926529,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.92890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06735634654760361,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009449420729652047,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009449420729652047,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15354090929031372,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19501928389072418,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002748382231220603,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002748382231220603,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004886813275516033,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.918359375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0063082781620323655,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006108516594395041,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006108516594395041,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27488839626312256,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.717578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35261892080307006,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03436104953289032,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03436104953289032,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2688797599130984,
|
|
"calibration/batch_distribution_entropy": 0.9675266719184563,
|
|
"calibration/batch_entropy_100bins": 0.9649150285306254,
|
|
"calibration/batch_entropy_10bins": 0.9675266719184563,
|
|
"calibration/batch_entropy_50bins": 0.9690713029080542,
|
|
"calibration/batch_uniqueness": 0.951904296875,
|
|
"calibration/buffer_distribution_entropy": 0.9989472583325865,
|
|
"calibration/buffer_entropy_100bins": 0.9990723220281166,
|
|
"calibration/buffer_entropy_10bins": 0.9989472583325865,
|
|
"calibration/buffer_entropy_50bins": 0.9991024787924342,
|
|
"calibration/confidence_entropy": 0.46889996447870286,
|
|
"calibration/coverage@0%": 0.01953125,
|
|
"calibration/coverage@1%": 0.01953125,
|
|
"calibration/coverage@10%": 0.03515625,
|
|
"calibration/coverage@15%": 0.130859375,
|
|
"calibration/coverage@20%": 0.2314453125,
|
|
"calibration/coverage@25%": 0.4111328125,
|
|
"calibration/coverage@30%": 0.73046875,
|
|
"calibration/coverage@5%": 0.02734375,
|
|
"calibration/ece": 0.14754415918657318,
|
|
"calibration/mean_confidence": 0.581622264145669,
|
|
"calibration/prompt_uniqueness": 0.8184814453125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 380.5,
|
|
"completions/max_terminated_length": 380.5,
|
|
"completions/mean_length": 186.61524963378906,
|
|
"completions/mean_terminated_length": 186.61524963378906,
|
|
"completions/min_length": 106.5,
|
|
"completions/min_terminated_length": 106.5,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1055012517.0,
|
|
"reward": 0.8589463829994202,
|
|
"reward_std": 0.08560431376099586,
|
|
"rewards/accuracy_reward": 0.558837890625,
|
|
"rewards/brier_reward": 0.7794443070888519,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0029379306361079216,
|
|
"rewards/frontier_coverage_1": 0.07742930576205254,
|
|
"rewards/frontier_coverage_10": 0.0766817256808281,
|
|
"rewards/frontier_coverage_15": 0.06769302859902382,
|
|
"rewards/frontier_coverage_20": 0.04756389185786247,
|
|
"rewards/frontier_coverage_25": 0.04449248127639294,
|
|
"rewards/frontier_coverage_5": 0.07734929025173187,
|
|
"rewards/frontier_ece_reward": 0.0017147985054180026,
|
|
"rewards/frontier_entropy_batch_reward": -0.20054014027118683,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0843353271484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1689453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.117337416857481,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6484375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04216766357421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04216766357421875,
|
|
"signal/advantage_abs_mean": 0.06579190492630005,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06579190492630005,
|
|
"signal/advantage_pre_scale_std": 0.10127944126725197,
|
|
"signal/advantage_std": 0.10127944126725197,
|
|
"signal/brier_reward/centered_abs_mean": 0.11069391667842865,
|
|
"signal/brier_reward/group_bin_occupancy": 0.86328125,
|
|
"signal/brier_reward/group_std_mean": 0.13984516263008118,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013836739584803581,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013836739584803581,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025345467729493976,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7607421875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004020490450784564,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.536838969215751e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.536838969215751e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14754138886928558,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8544921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1890631541609764,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026409910060465336,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026409910060465336,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14654190093278885,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8544921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1877775639295578,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002623099833726883,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002623099833726883,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13195443153381348,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8564453125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16884687542915344,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023619841085746884,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023619841085746884,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07070561498403549,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8916015625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09012233838438988,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001265630533453077,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001265630533453077,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04924464598298073,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.92578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06359815411269665,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008814791508484632,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008814791508484632,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1474618911743164,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85546875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18895908445119858,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026395675959065557,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026395675959065557,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004972347756847739,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.9130859375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006475380156189203,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006215434696059674,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006215434696059674,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2751055657863617,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.712890625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3516087681055069,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03438819572329521,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.125,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03438819572329521,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.0041191707219135985,
|
|
"train_runtime": 59190.2375,
|
|
"train_samples_per_second": 0.338,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1055012517,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|