6365 lines
405 KiB
JSON
6365 lines
405 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.5100526402040415,
|
|
"calibration/batch_distribution_entropy": 0.29006821757016016,
|
|
"calibration/confidence_entropy": 0.2233175770524201,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.47929644203736255,
|
|
"calibration/mean_confidence": 0.9111617651712166,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018142361111111116,
|
|
"completions/max_length": 4043.0,
|
|
"completions/max_terminated_length": 4043.0,
|
|
"completions/mean_length": 515.587939453125,
|
|
"completions/mean_terminated_length": 525.1046997070313,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.8,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.005509195849299431,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0067,
|
|
"num_tokens": 9053781.0,
|
|
"reward": 0.6579087376594543,
|
|
"reward_std": 0.6609244465827941,
|
|
"rewards/accgated_coverage_0": 0.2749160468578339,
|
|
"rewards/accgated_coverage_1": 0.2749160468578339,
|
|
"rewards/accgated_coverage_10": 0.2749160468578339,
|
|
"rewards/accgated_coverage_15": 0.2749160468578339,
|
|
"rewards/accgated_coverage_20": 0.2749160468578339,
|
|
"rewards/accgated_coverage_25": 0.2749160468578339,
|
|
"rewards/accgated_coverage_5": 0.2749160468578339,
|
|
"rewards/accuracy_reward": 0.2602430522441864,
|
|
"rewards/brier_reward": 0.3124499797821045,
|
|
"rewards/confidence_uniqueness_reward": 0.2885975897312164,
|
|
"rewards/format_reward": 0.6042534708976746,
|
|
"rewards/frontier_aurc_reward": 0.2749160468578339,
|
|
"rewards/frontier_ece_reward": 0.2749160468578339,
|
|
"rewards/frontier_entropy_batch_reward": -0.5781359553337098,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.3091569423675537,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.36718695163726806,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.3091569423675537,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.36718695163726806,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.3091569423675537,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.36718695163726806,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.3091569423675537,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.36718695163726806,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.3091569423675537,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.36718695163726806,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.3091569423675537,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.36718695163726806,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.3091569423675537,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.36718695163726806,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.30717231035232545,
|
|
"signal/accuracy_reward/group_std_mean": 0.3660562574863434,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.10000000074505806,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15358615517616273,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15358615517616273,
|
|
"signal/advantage_abs_mean": 0.5620251536369324,
|
|
"signal/advantage_pre_scale_abs_mean": 0.5620251536369324,
|
|
"signal/advantage_pre_scale_std": 0.6785519242286682,
|
|
"signal/advantage_std": 0.6785519242286682,
|
|
"signal/brier_reward/centered_abs_mean": 0.3185951590538025,
|
|
"signal/brier_reward/group_std_mean": 0.3719545781612396,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03185951597988605,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03185951597988605,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2353891611099243,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.28763567209243773,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023538917675614356,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023538917675614356,
|
|
"signal/format_reward/centered_abs_mean": 0.4384060263633728,
|
|
"signal/format_reward/group_std_mean": 0.4737804293632507,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2192030131816864,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2192030131816864,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3091569423675537,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36718695163726806,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038644616957753896,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038644616957753896,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3091569423675537,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36718695163726806,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030915693566203117,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4478966951370239,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4815566301345825,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.044789671897888184,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044789671897888184,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5252471496664904,
|
|
"calibration/batch_distribution_entropy": 0.25438653899177793,
|
|
"calibration/confidence_entropy": 0.22173958000055646,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.471072254833574,
|
|
"calibration/mean_confidence": 0.9203944885303501,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016232638888888908,
|
|
"completions/max_length": 4008.4,
|
|
"completions/max_terminated_length": 4008.4,
|
|
"completions/mean_length": 482.696630859375,
|
|
"completions/mean_terminated_length": 490.9005981445313,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 7.8,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.004243232775479555,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0057,
|
|
"num_tokens": 17697166.0,
|
|
"reward": 0.7655806422233582,
|
|
"reward_std": 0.6458010196685791,
|
|
"rewards/accgated_coverage_0": 0.3147114455699921,
|
|
"rewards/accgated_coverage_1": 0.3147114455699921,
|
|
"rewards/accgated_coverage_10": 0.3147114455699921,
|
|
"rewards/accgated_coverage_15": 0.3147114455699921,
|
|
"rewards/accgated_coverage_20": 0.3147114455699921,
|
|
"rewards/accgated_coverage_25": 0.3147114455699921,
|
|
"rewards/accgated_coverage_5": 0.3147114455699921,
|
|
"rewards/accuracy_reward": 0.2984375,
|
|
"rewards/brier_reward": 0.3611126124858856,
|
|
"rewards/confidence_uniqueness_reward": 0.3588679790496826,
|
|
"rewards/format_reward": 0.7130208492279053,
|
|
"rewards/frontier_aurc_reward": 0.3147114455699921,
|
|
"rewards/frontier_ece_reward": 0.3147114455699921,
|
|
"rewards/frontier_entropy_batch_reward": -0.6784967422485352,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.3180821776390076,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.37312147617340086,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.3180821776390076,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.37312147617340086,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.3180821776390076,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.37312147617340086,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.3180821776390076,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.37312147617340086,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.3180821776390076,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.37312147617340086,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.3180821776390076,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.37312147617340086,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.3180821776390076,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.37312147617340086,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3222547709941864,
|
|
"signal/accuracy_reward/group_std_mean": 0.3800747275352478,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.0777777798473835,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1611273854970932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1611273854970932,
|
|
"signal/advantage_abs_mean": 0.5453694581985473,
|
|
"signal/advantage_pre_scale_abs_mean": 0.5453694581985473,
|
|
"signal/advantage_pre_scale_std": 0.6612506985664368,
|
|
"signal/advantage_std": 0.6612506985664368,
|
|
"signal/brier_reward/centered_abs_mean": 0.3204429686069489,
|
|
"signal/brier_reward/group_std_mean": 0.3723629653453827,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03204429745674133,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03204429745674133,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22323725521564483,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.28084011673927306,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022323725372552873,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022323725372552873,
|
|
"signal/format_reward/centered_abs_mean": 0.3584092855453491,
|
|
"signal/format_reward/group_std_mean": 0.42185020446777344,
|
|
"signal/format_reward/group_zero_std_frac": 0.00555555559694767,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17920464277267456,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.17920464277267456,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3180821776390076,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.37312147617340086,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003976027341559529,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003976027341559529,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3180821776390076,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.37312147617340086,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03180821873247623,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3863869488239288,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44503648281097413,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03863869607448578,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03863869607448578,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5362176870303669,
|
|
"calibration/batch_distribution_entropy": 0.31565510576164746,
|
|
"calibration/confidence_entropy": 0.24680094421113,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4811362427740694,
|
|
"calibration/mean_confidence": 0.9068744050179488,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010069444444444464,
|
|
"completions/max_length": 3983.8,
|
|
"completions/max_terminated_length": 3983.8,
|
|
"completions/mean_length": 456.1573852539062,
|
|
"completions/mean_terminated_length": 460.8325134277344,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 55.2,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.0021033829543739557,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0113,
|
|
"num_tokens": 26054083.0,
|
|
"reward": 0.9580986380577088,
|
|
"reward_std": 0.555222624540329,
|
|
"rewards/accgated_coverage_0": 0.37856481671333314,
|
|
"rewards/accgated_coverage_1": 0.37856481671333314,
|
|
"rewards/accgated_coverage_10": 0.37856481671333314,
|
|
"rewards/accgated_coverage_15": 0.37856481671333314,
|
|
"rewards/accgated_coverage_20": 0.37856481671333314,
|
|
"rewards/accgated_coverage_25": 0.37856481671333314,
|
|
"rewards/accgated_coverage_5": 0.37856481671333314,
|
|
"rewards/accuracy_reward": 0.3509548604488373,
|
|
"rewards/brier_reward": 0.4531111419200897,
|
|
"rewards/confidence_uniqueness_reward": 0.507493644952774,
|
|
"rewards/format_reward": 0.9373263835906982,
|
|
"rewards/frontier_aurc_reward": 0.37856481671333314,
|
|
"rewards/frontier_ece_reward": 0.37856481671333314,
|
|
"rewards/frontier_entropy_batch_reward": -0.8968637108802795,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.3092223465442657,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.3651528418064117,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.3092223465442657,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.3651528418064117,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.3092223465442657,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.3651528418064117,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.3092223465442657,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.3651528418064117,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.3092223465442657,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.3651528418064117,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.3092223465442657,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.3651528418064117,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.3092223465442657,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.3651528418064117,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.31787651777267456,
|
|
"signal/accuracy_reward/group_std_mean": 0.37809754610061647,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07500000149011612,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15893825888633728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15893825888633728,
|
|
"signal/advantage_abs_mean": 0.46249008774757383,
|
|
"signal/advantage_pre_scale_abs_mean": 0.46249008774757383,
|
|
"signal/advantage_pre_scale_std": 0.5720869541168213,
|
|
"signal/advantage_std": 0.5720869541168213,
|
|
"signal/brier_reward/centered_abs_mean": 0.2960561692714691,
|
|
"signal/brier_reward/group_std_mean": 0.3486612796783447,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029605618491768838,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.029605618491768838,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1899004429578781,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.23581856489181519,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01899004392325878,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01899004392325878,
|
|
"signal/format_reward/centered_abs_mean": 0.10666232705116271,
|
|
"signal/format_reward/group_std_mean": 0.1854827418923378,
|
|
"signal/format_reward/group_zero_std_frac": 0.3083333432674408,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05333116352558136,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.05333116352558136,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3092223465442657,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3651528418064117,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038652794901281595,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038652794901281595,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3092223465442657,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3651528418064117,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030922235921025276,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17015037536621094,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.27351958155632017,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10277778059244155,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017015037685632707,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017015037685632707,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.43721687510369617,
|
|
"calibration/batch_distribution_entropy": 0.40072087939511974,
|
|
"calibration/buffer_distribution_entropy": 0.316298540027451,
|
|
"calibration/confidence_entropy": 0.3090699022802787,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.03133159268929504,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3651260156369419,
|
|
"calibration/mean_confidence": 0.8837088543583007,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010416666666666652,
|
|
"completions/max_length": 3783.2,
|
|
"completions/max_terminated_length": 3783.2,
|
|
"completions/mean_length": 484.31285400390624,
|
|
"completions/mean_terminated_length": 489.445556640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 86.4,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.0007687319302931428,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0081,
|
|
"num_tokens": 34747063.0,
|
|
"reward": 0.8955905079841614,
|
|
"reward_std": 0.33118437230587006,
|
|
"rewards/accgated_coverage_0": 0.19059269838035106,
|
|
"rewards/accgated_coverage_1": 0.19059269838035106,
|
|
"rewards/accgated_coverage_10": 0.19059269838035106,
|
|
"rewards/accgated_coverage_15": 0.19059269838035106,
|
|
"rewards/accgated_coverage_20": 0.19059269838035106,
|
|
"rewards/accgated_coverage_25": 0.19059269838035106,
|
|
"rewards/accgated_coverage_5": 0.19059269838035106,
|
|
"rewards/accuracy_reward": 0.4530381917953491,
|
|
"rewards/brier_reward": 0.5732760310173035,
|
|
"rewards/confidence_uniqueness_reward": 0.5944553971290588,
|
|
"rewards/format_reward": 0.9848090171813965,
|
|
"rewards/frontier_aurc_reward": 0.18578348318114876,
|
|
"rewards/frontier_ece_reward": 0.18266455382108687,
|
|
"rewards/frontier_entropy_batch_reward": -0.9410987496376038,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.11304162042215467,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.1411184100434184,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.11304162042215467,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.1411184100434184,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.11304162042215467,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.1411184100434184,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.11304162042215467,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.1411184100434184,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.11304162042215467,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.1411184100434184,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.11304162042215467,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.1411184100434184,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.11304162042215467,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.1411184100434184,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.011304162652231753,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.28528103828430174,
|
|
"signal/accuracy_reward/group_std_mean": 0.35118488073348997,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.10000000149011612,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.14264051914215087,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.14264051914215087,
|
|
"signal/advantage_abs_mean": 0.26579251885414124,
|
|
"signal/advantage_pre_scale_abs_mean": 0.26579251885414124,
|
|
"signal/advantage_pre_scale_std": 0.3466378539800644,
|
|
"signal/advantage_std": 0.3466378539800644,
|
|
"signal/brier_reward/centered_abs_mean": 0.24700996279716492,
|
|
"signal/brier_reward/group_std_mean": 0.30329431891441344,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024700997024774553,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024700997024774553,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18052109479904174,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.21723917722702027,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018052110448479652,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018052110448479652,
|
|
"signal/format_reward/centered_abs_mean": 0.02763129323720932,
|
|
"signal/format_reward/group_std_mean": 0.06006475985050201,
|
|
"signal/format_reward/group_zero_std_frac": 0.725,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01381564661860466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01381564661860466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.1107280052267015,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.13636438250541688,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0013841001396940555,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0013841001396940555,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.19009114503860475,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.22972893118858337,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0190091148018837,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0190091148018837,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1023610308766365,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.19736399948596955,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.24444444477558136,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010236102901399136,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010236102901399136,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3437265656341763,
|
|
"calibration/batch_distribution_entropy": 0.5656963876081482,
|
|
"calibration/buffer_distribution_entropy": 0.36739076478574584,
|
|
"calibration/confidence_entropy": 0.38956441308086304,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.007387862796833773,
|
|
"calibration/coverage@15%": 0.0079155672823219,
|
|
"calibration/coverage@20%": 0.1128265344182127,
|
|
"calibration/coverage@25%": 0.21094365165978254,
|
|
"calibration/coverage@30%": 0.5211917227624034,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.23278605898703247,
|
|
"calibration/mean_confidence": 0.837022899828915,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010329861111111116,
|
|
"completions/max_length": 3925.4,
|
|
"completions/max_terminated_length": 3925.4,
|
|
"completions/mean_length": 528.5024353027344,
|
|
"completions/mean_terminated_length": 534.0195007324219,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 105.0,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.0007447434472851455,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0059,
|
|
"num_tokens": 43959859.0,
|
|
"reward": 0.8110012769699096,
|
|
"reward_std": 0.20988352298736573,
|
|
"rewards/accgated_coverage_0": -0.0003714228863827884,
|
|
"rewards/accgated_coverage_1": -0.0003714228863827884,
|
|
"rewards/accgated_coverage_10": -0.0003714228863827884,
|
|
"rewards/accgated_coverage_15": -0.0003714228863827884,
|
|
"rewards/accgated_coverage_20": -0.0003714228863827884,
|
|
"rewards/accgated_coverage_25": -0.0003714228863827884,
|
|
"rewards/accgated_coverage_5": -0.0003714228863827884,
|
|
"rewards/accuracy_reward": 0.5520833313465119,
|
|
"rewards/brier_reward": 0.6689429998397827,
|
|
"rewards/confidence_uniqueness_reward": 0.6822849154472351,
|
|
"rewards/format_reward": 0.9855902791023254,
|
|
"rewards/frontier_aurc_reward": -0.003991629648953676,
|
|
"rewards/frontier_ece_reward": 0.013007813505828381,
|
|
"rewards/frontier_entropy_batch_reward": -0.9394919872283936,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.012830922938883304,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.019968029484152792,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.012830922938883304,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.019968029484152792,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.012830922938883304,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.019968029484152792,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.012830922938883304,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.019968029484152792,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.012830922938883304,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.019968029484152792,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.012830922938883304,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.019968029484152792,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.012830922938883304,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.019968029484152792,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0012830922496505082,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.27000868022441865,
|
|
"signal/accuracy_reward/group_std_mean": 0.3324960470199585,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.13888888955116271,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13500434011220933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13500434011220933,
|
|
"signal/advantage_abs_mean": 0.16560438871383668,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16560438871383668,
|
|
"signal/advantage_pre_scale_std": 0.2258564442396164,
|
|
"signal/advantage_std": 0.2258564442396164,
|
|
"signal/brier_reward/centered_abs_mean": 0.20755743682384492,
|
|
"signal/brier_reward/group_std_mean": 0.25747441351413725,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020755743607878685,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020755743607878685,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12232207953929901,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.15422678291797637,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012232208624482155,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012232208624482155,
|
|
"signal/format_reward/centered_abs_mean": 0.02601996473968029,
|
|
"signal/format_reward/group_std_mean": 0.053718936443328855,
|
|
"signal/format_reward/group_zero_std_frac": 0.7638888835906983,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013009982369840144,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013009982369840144,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002716710604727268,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004093794990330935,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3958881977014245e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3958881977014245e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1095162957906723,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1322493463754654,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.010951629653573037,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.010951629653573037,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10384236574172974,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.20167314410209655,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.28611111342906953,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010384235717356204,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010384235717356204,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29093421091370353,
|
|
"calibration/batch_distribution_entropy": 0.6750995975665728,
|
|
"calibration/buffer_distribution_entropy": 0.45378212856528916,
|
|
"calibration/confidence_entropy": 0.4977573778236459,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.05318784766795036,
|
|
"calibration/coverage@25%": 0.21265174577746065,
|
|
"calibration/coverage@30%": 0.7557201183853943,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.12327898021567016,
|
|
"calibration/mean_confidence": 0.7645972398248402,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016840277777777767,
|
|
"completions/max_length": 3997.6,
|
|
"completions/max_terminated_length": 3997.6,
|
|
"completions/mean_length": 594.1150146484375,
|
|
"completions/mean_terminated_length": 604.371875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 121.6,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.000512155529577285,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.007,
|
|
"num_tokens": 53913984.0,
|
|
"reward": 0.8377140998840332,
|
|
"reward_std": 0.18636699616909028,
|
|
"rewards/accgated_coverage_0": -0.004444451769813895,
|
|
"rewards/accgated_coverage_1": -0.004444451769813895,
|
|
"rewards/accgated_coverage_10": -0.004444451769813895,
|
|
"rewards/accgated_coverage_15": -0.004444451769813895,
|
|
"rewards/accgated_coverage_20": -0.004444451769813895,
|
|
"rewards/accgated_coverage_25": -0.004444451769813895,
|
|
"rewards/accgated_coverage_5": -0.004444451769813895,
|
|
"rewards/accuracy_reward": 0.600000011920929,
|
|
"rewards/brier_reward": 0.7193986296653747,
|
|
"rewards/confidence_uniqueness_reward": 0.7030134916305542,
|
|
"rewards/format_reward": 0.9794270873069764,
|
|
"rewards/frontier_aurc_reward": -0.0032217550091445447,
|
|
"rewards/frontier_ece_reward": 0.01657584626227617,
|
|
"rewards/frontier_entropy_batch_reward": -0.927468454837799,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.022231166064739228,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.031239988654851912,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.022231166064739228,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.031239988654851912,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.022231166064739228,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.031239988654851912,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.022231166064739228,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.031239988654851912,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.022231166064739228,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.031239988654851912,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.022231166064739228,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.031239988654851912,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.022231166064739228,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.031239988654851912,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0022231166949495673,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.23517795503139496,
|
|
"signal/accuracy_reward/group_std_mean": 0.29286627769470214,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.23055555820465087,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11758897751569748,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11758897751569748,
|
|
"signal/advantage_abs_mean": 0.14355775713920593,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14355775713920593,
|
|
"signal/advantage_pre_scale_std": 0.20758473575115205,
|
|
"signal/advantage_std": 0.20758473575115205,
|
|
"signal/brier_reward/centered_abs_mean": 0.16719190776348114,
|
|
"signal/brier_reward/group_std_mean": 0.21108767986297608,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016719191148877145,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016719191148877145,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1300051212310791,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16288177371025087,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01300051212310791,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01300051212310791,
|
|
"signal/format_reward/centered_abs_mean": 0.03359917588531971,
|
|
"signal/format_reward/group_std_mean": 0.06414167955517769,
|
|
"signal/format_reward/group_zero_std_frac": 0.7333333373069764,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016799587942659854,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016799587942659854,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017353732837364078,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027700068429112436,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1692166410502977e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1692166410502977e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06365966349840164,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08195510059595108,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006365966238081455,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006365966238081455,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.12449786216020584,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23548357784748078,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.21388889253139495,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012449786253273486,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.012449786253273486,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24915260747354795,
|
|
"calibration/batch_distribution_entropy": 0.6926192490991758,
|
|
"calibration/buffer_distribution_entropy": 0.5422166465452627,
|
|
"calibration/confidence_entropy": 0.5485382320909491,
|
|
"calibration/coverage@0%": 0.012073490813648294,
|
|
"calibration/coverage@1%": 0.012073490813648294,
|
|
"calibration/coverage@10%": 0.08183765599642892,
|
|
"calibration/coverage@15%": 0.09896438274551941,
|
|
"calibration/coverage@20%": 0.3158131348849086,
|
|
"calibration/coverage@25%": 0.5002561456375967,
|
|
"calibration/coverage@30%": 0.8,
|
|
"calibration/coverage@5%": 0.015223097112860892,
|
|
"calibration/ece": 0.07716275614743286,
|
|
"calibration/mean_confidence": 0.7135749575163094,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020225694444444463,
|
|
"completions/max_length": 4023.0,
|
|
"completions/max_terminated_length": 4023.0,
|
|
"completions/mean_length": 624.6056518554688,
|
|
"completions/mean_terminated_length": 637.64052734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 163.8,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.0005128980264998972,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0118,
|
|
"num_tokens": 64186881.0,
|
|
"reward": 0.8555383443832397,
|
|
"reward_std": 0.17517488598823547,
|
|
"rewards/accgated_coverage_0": -0.01074592862278223,
|
|
"rewards/accgated_coverage_1": -0.01074592862278223,
|
|
"rewards/accgated_coverage_10": -0.01074592862278223,
|
|
"rewards/accgated_coverage_15": -0.01074592862278223,
|
|
"rewards/accgated_coverage_20": -0.01074592862278223,
|
|
"rewards/accgated_coverage_25": -0.01074592862278223,
|
|
"rewards/accgated_coverage_5": -0.01074592862278223,
|
|
"rewards/accuracy_reward": 0.6256076455116272,
|
|
"rewards/brier_reward": 0.744514000415802,
|
|
"rewards/confidence_uniqueness_reward": 0.7291248798370361,
|
|
"rewards/format_reward": 0.9771701335906983,
|
|
"rewards/frontier_aurc_reward": -0.0027033671736717223,
|
|
"rewards/frontier_ece_reward": 0.009526663832366467,
|
|
"rewards/frontier_entropy_batch_reward": -0.8661114573478699,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.03372676484286785,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.044625566154718396,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.03372676484286785,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.044625566154718396,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.03372676484286785,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.044625566154718396,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03372676484286785,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.044625566154718396,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03372676484286785,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.044625566154718396,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.03372676484286785,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.044625566154718396,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.03372676484286785,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.044625566154718396,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0033726765774190425,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.21490342915058136,
|
|
"signal/accuracy_reward/group_std_mean": 0.2722406297922134,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.26666666865348815,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10745171457529068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10745171457529068,
|
|
"signal/advantage_abs_mean": 0.132158000767231,
|
|
"signal/advantage_pre_scale_abs_mean": 0.132158000767231,
|
|
"signal/advantage_pre_scale_std": 0.1931176006793976,
|
|
"signal/advantage_std": 0.1931176006793976,
|
|
"signal/brier_reward/centered_abs_mean": 0.1418018341064453,
|
|
"signal/brier_reward/group_std_mean": 0.18220576047897338,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014180183783173561,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014180183783173561,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12838705331087114,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.15706448554992675,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012838705442845821,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012838705442845821,
|
|
"signal/format_reward/centered_abs_mean": 0.03528103269636631,
|
|
"signal/format_reward/group_std_mean": 0.06216970533132553,
|
|
"signal/format_reward/group_zero_std_frac": 0.7583333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017640516348183154,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017640516348183154,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001337575796060264,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002151899482123554,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.671969730523415e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.671969730523415e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04378785789012909,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06361446380615235,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004378785844892263,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004378785844892263,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21290639340877532,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33500158190727236,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.09166666641831397,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021290638856589793,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021290638856589793,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27114436702428224,
|
|
"calibration/batch_distribution_entropy": 0.8070451575586078,
|
|
"calibration/buffer_distribution_entropy": 0.6061007181434863,
|
|
"calibration/confidence_entropy": 0.5118545178162875,
|
|
"calibration/coverage@0%": 0.0037735849056603774,
|
|
"calibration/coverage@1%": 0.0037735849056603774,
|
|
"calibration/coverage@10%": 0.005929919137466307,
|
|
"calibration/coverage@15%": 0.057831138761233844,
|
|
"calibration/coverage@20%": 0.2453923415759484,
|
|
"calibration/coverage@25%": 0.48747760107551913,
|
|
"calibration/coverage@30%": 0.6787977944126284,
|
|
"calibration/coverage@5%": 0.0037735849056603774,
|
|
"calibration/ece": 0.10934041252550494,
|
|
"calibration/mean_confidence": 0.7113611514039639,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018750000000000024,
|
|
"completions/max_length": 3989.2,
|
|
"completions/max_terminated_length": 3989.2,
|
|
"completions/mean_length": 631.6644165039063,
|
|
"completions/mean_terminated_length": 643.7576416015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 168.2,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0004933858290314674,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0147,
|
|
"num_tokens": 74583175.0,
|
|
"reward": 0.9134387254714966,
|
|
"reward_std": 0.17632506489753724,
|
|
"rewards/accgated_coverage_0": -0.008661169628612696,
|
|
"rewards/accgated_coverage_1": -0.008661169628612696,
|
|
"rewards/accgated_coverage_10": -0.008661169628612696,
|
|
"rewards/accgated_coverage_15": -0.008661169628612696,
|
|
"rewards/accgated_coverage_20": -0.008661169628612696,
|
|
"rewards/accgated_coverage_25": -0.008661169628612696,
|
|
"rewards/accgated_coverage_5": -0.008661169628612696,
|
|
"rewards/accuracy_reward": 0.6469618082046509,
|
|
"rewards/brier_reward": 0.7537546157836914,
|
|
"rewards/confidence_uniqueness_reward": 0.8956602692604065,
|
|
"rewards/format_reward": 0.9789930582046509,
|
|
"rewards/frontier_aurc_reward": -0.00252625304274261,
|
|
"rewards/frontier_ece_reward": 0.004404827463440597,
|
|
"rewards/frontier_entropy_batch_reward": -0.5882636129856109,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.04707988202571869,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.06582793518900872,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.04707988202571869,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.06582793518900872,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.04707988202571869,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06582793518900872,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.04707988202571869,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06582793518900872,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04707988202571869,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06582793518900872,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04707988202571869,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06582793518900872,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.04707988202571869,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.06582793518900872,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.004707988444715738,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19857313632965087,
|
|
"signal/accuracy_reward/group_std_mean": 0.26094743609428406,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.26388889253139497,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09928656816482544,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09928656816482544,
|
|
"signal/advantage_abs_mean": 0.13168415129184724,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13168415129184724,
|
|
"signal/advantage_pre_scale_std": 0.1955942243337631,
|
|
"signal/advantage_std": 0.1955942243337631,
|
|
"signal/brier_reward/centered_abs_mean": 0.15957279205322267,
|
|
"signal/brier_reward/group_std_mean": 0.2055341988801956,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01595727913081646,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01595727913081646,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07235777825117111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10315420925617218,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007235778030008078,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007235778030008078,
|
|
"signal/format_reward/centered_abs_mean": 0.0353515625,
|
|
"signal/format_reward/group_std_mean": 0.06308476254343987,
|
|
"signal/format_reward/group_zero_std_frac": 0.7527777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01767578125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01767578125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019746162462979553,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032937098294496536,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4682703951839358e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4682703951839358e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05753466859459877,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09539404213428497,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005753467138856649,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005753467138856649,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4239261865615845,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4854666531085968,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04239262193441391,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04239262193441391,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22503516674594654,
|
|
"calibration/batch_distribution_entropy": 0.936723476211095,
|
|
"calibration/buffer_distribution_entropy": 0.6664189748711518,
|
|
"calibration/confidence_entropy": 0.4939127257586667,
|
|
"calibration/coverage@0%": 0.0005208333333333333,
|
|
"calibration/coverage@1%": 0.0005208333333333333,
|
|
"calibration/coverage@10%": 0.029163725780200794,
|
|
"calibration/coverage@15%": 0.1030003198965398,
|
|
"calibration/coverage@20%": 0.431992266240601,
|
|
"calibration/coverage@25%": 0.8520846136304583,
|
|
"calibration/coverage@30%": 0.9671087533156498,
|
|
"calibration/coverage@5%": 0.0005208333333333333,
|
|
"calibration/ece": 0.16665695734001001,
|
|
"calibration/mean_confidence": 0.6389931033855959,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017361111111111115,
|
|
"completions/max_length": 3583.8,
|
|
"completions/max_terminated_length": 3583.8,
|
|
"completions/mean_length": 662.6905395507813,
|
|
"completions/mean_terminated_length": 674.4956298828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 186.6,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.0004158303199801594,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.016,
|
|
"num_tokens": 85352634.0,
|
|
"reward": 0.9387454271316529,
|
|
"reward_std": 0.17266902327537537,
|
|
"rewards/accgated_coverage_0": -0.01770685804076493,
|
|
"rewards/accgated_coverage_1": -0.01770685804076493,
|
|
"rewards/accgated_coverage_10": -0.01770685804076493,
|
|
"rewards/accgated_coverage_15": -0.01770685804076493,
|
|
"rewards/accgated_coverage_20": -0.01770685804076493,
|
|
"rewards/accgated_coverage_25": -0.01770685804076493,
|
|
"rewards/accgated_coverage_5": -0.01770685804076493,
|
|
"rewards/accuracy_reward": 0.647569453716278,
|
|
"rewards/brier_reward": 0.7374800324440003,
|
|
"rewards/confidence_uniqueness_reward": 0.9320099115371704,
|
|
"rewards/format_reward": 0.9820312619209289,
|
|
"rewards/frontier_aurc_reward": -0.002436440950259566,
|
|
"rewards/frontier_ece_reward": -0.0011549136601388455,
|
|
"rewards/frontier_entropy_batch_reward": -0.30463194847106934,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08323955237865448,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11306367218494415,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08323955237865448,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11306367218494415,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08323955237865448,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11306367218494415,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08323955237865448,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11306367218494415,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08323955237865448,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11306367218494415,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08323955237865448,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11306367218494415,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08323955237865448,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11306367218494415,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008323955349624157,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1957465261220932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2632771462202072,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.23888889253139495,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0978732630610466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0978732630610466,
|
|
"signal/advantage_abs_mean": 0.12990787327289582,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12990787327289582,
|
|
"signal/advantage_pre_scale_std": 0.1868172764778137,
|
|
"signal/advantage_std": 0.1868172764778137,
|
|
"signal/brier_reward/centered_abs_mean": 0.20081567168235778,
|
|
"signal/brier_reward/group_std_mean": 0.25101232826709746,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020081567019224165,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020081567019224165,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04166658595204353,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06948793828487396,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0041666587349027395,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041666587349027395,
|
|
"signal/format_reward/centered_abs_mean": 0.03059353269636631,
|
|
"signal/format_reward/group_std_mean": 0.05648069083690643,
|
|
"signal/format_reward/group_zero_std_frac": 0.7722222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015296766348183156,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015296766348183156,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002271978510543704,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003890362149104476,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8399731672834604e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8399731672834604e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05871346145868302,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0988737851381302,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0058713463135063645,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0058713463135063645,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.36136391162872317,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43039796948432923,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036136391758918765,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036136391758918765,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.41323223140525994,
|
|
"calibration/batch_distribution_entropy": 0.9674160680358039,
|
|
"calibration/buffer_distribution_entropy": 0.7243289379368653,
|
|
"calibration/confidence_entropy": 0.4737487432824161,
|
|
"calibration/coverage@0%": 0.005841469816272966,
|
|
"calibration/coverage@1%": 0.005841469816272966,
|
|
"calibration/coverage@10%": 0.005841469816272966,
|
|
"calibration/coverage@15%": 0.011226555171169642,
|
|
"calibration/coverage@20%": 0.03386231438051376,
|
|
"calibration/coverage@25%": 0.04409075280684254,
|
|
"calibration/coverage@30%": 0.22428921542039948,
|
|
"calibration/coverage@5%": 0.005841469816272966,
|
|
"calibration/ece": 0.2079400346436131,
|
|
"calibration/mean_confidence": 0.6007060708093117,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015017361111111094,
|
|
"completions/max_length": 3660.8,
|
|
"completions/max_terminated_length": 3660.8,
|
|
"completions/mean_length": 669.3516479492188,
|
|
"completions/mean_terminated_length": 679.5452392578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 165.8,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0004630894400179386,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.0142,
|
|
"num_tokens": 96161165.0,
|
|
"reward": 0.9375450730323791,
|
|
"reward_std": 0.16306346654891968,
|
|
"rewards/accgated_coverage_0": -0.008981034625321627,
|
|
"rewards/accgated_coverage_1": -0.008981034625321627,
|
|
"rewards/accgated_coverage_10": -0.008981034625321627,
|
|
"rewards/accgated_coverage_15": -0.008981034625321627,
|
|
"rewards/accgated_coverage_20": -0.008981034625321627,
|
|
"rewards/accgated_coverage_25": -0.008981034625321627,
|
|
"rewards/accgated_coverage_5": -0.008981034625321627,
|
|
"rewards/accuracy_reward": 0.6288194537162781,
|
|
"rewards/brier_reward": 0.7329934477806092,
|
|
"rewards/confidence_uniqueness_reward": 0.9333699584007263,
|
|
"rewards/format_reward": 0.9846354246139526,
|
|
"rewards/frontier_aurc_reward": -0.002544494904577732,
|
|
"rewards/frontier_ece_reward": 0.007357514463365078,
|
|
"rewards/frontier_entropy_batch_reward": -0.30235905647277833,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08796186000108719,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.119529028236866,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08796186000108719,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.119529028236866,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08796186000108719,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.119529028236866,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08796186000108719,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.119529028236866,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08796186000108719,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.119529028236866,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08796186000108719,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.119529028236866,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08796186000108719,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.119529028236866,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00879618600010872,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.2495385080575943,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2944444417953491,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.094921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.094921875,
|
|
"signal/advantage_abs_mean": 0.12400750964879989,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12400750964879989,
|
|
"signal/advantage_pre_scale_std": 0.17833105027675628,
|
|
"signal/advantage_std": 0.17833105027675628,
|
|
"signal/brier_reward/centered_abs_mean": 0.21165235340595245,
|
|
"signal/brier_reward/group_std_mean": 0.2621103286743164,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021165235340595244,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021165235340595244,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03903521485626697,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06291390731930732,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0039035214576870203,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0039035214576870203,
|
|
"signal/format_reward/centered_abs_mean": 0.025244140625,
|
|
"signal/format_reward/group_std_mean": 0.046590489149093625,
|
|
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0126220703125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0126220703125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024692637380212545,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003953724354505539,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.086579599766992e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.086579599766992e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05328697934746742,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08139285743236542,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005328698176890611,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005328698176890611,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3716325283050537,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43904575109481814,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.037163253873586655,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.037163253873586655,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.24055350759374375,
|
|
"eval_calibration/batch_distribution_entropy": 0.8994038710520571,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7535534438871466,
|
|
"eval_calibration/confidence_entropy": 0.46967083146270466,
|
|
"eval_calibration/coverage@0%": 0.09442204301075269,
|
|
"eval_calibration/coverage@1%": 0.09442204301075269,
|
|
"eval_calibration/coverage@10%": 0.09442204301075269,
|
|
"eval_calibration/coverage@15%": 0.302755376344086,
|
|
"eval_calibration/coverage@20%": 0.3602150537634408,
|
|
"eval_calibration/coverage@25%": 0.7044690860215054,
|
|
"eval_calibration/coverage@30%": 0.9097782258064516,
|
|
"eval_calibration/coverage@5%": 0.09442204301075269,
|
|
"eval_calibration/ece": 0.24757249249593236,
|
|
"eval_calibration/mean_confidence": 0.6110164873464803,
|
|
"eval_completions/clipped_ratio": 0.016493055555555563,
|
|
"eval_completions/max_length": 2924.0,
|
|
"eval_completions/max_terminated_length": 2924.0,
|
|
"eval_completions/mean_length": 646.5880432128906,
|
|
"eval_completions/mean_terminated_length": 657.3784790039062,
|
|
"eval_completions/min_length": 48.5,
|
|
"eval_completions/min_terminated_length": 235.16666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 96161165.0,
|
|
"eval_reward": 0.8702710568904877,
|
|
"eval_reward_std": 0.23088541626930237,
|
|
"eval_rewards/accgated_coverage_0": -0.007695769192650914,
|
|
"eval_rewards/accgated_coverage_1": -0.007695769192650914,
|
|
"eval_rewards/accgated_coverage_10": -0.007695769192650914,
|
|
"eval_rewards/accgated_coverage_15": -0.007695769192650914,
|
|
"eval_rewards/accgated_coverage_20": -0.007695769192650914,
|
|
"eval_rewards/accgated_coverage_25": -0.007695769192650914,
|
|
"eval_rewards/accgated_coverage_5": -0.007695769192650914,
|
|
"eval_rewards/accuracy_reward": 0.6388888955116272,
|
|
"eval_rewards/brier_reward": 0.7428200940291086,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8793094853560129,
|
|
"eval_rewards/format_reward": 0.9826388955116272,
|
|
"eval_rewards/frontier_aurc_reward": -0.002335130760911852,
|
|
"eval_rewards/frontier_ece_reward": 0.009743184200488031,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9826388955116272,
|
|
"eval_runtime": 196.7222,
|
|
"eval_samples_per_second": 5.083,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.10488361865282059,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.15331803013881048,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.10488361865282059,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.15331803013881048,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.10488361865282059,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.15331803013881048,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.10488361865282059,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.15331803013881048,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.10488361865282059,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.15331803013881048,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.10488361865282059,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.15331803013881048,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.10488361865282059,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.15331803013881048,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.010488361585885286,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4456380208333333,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.47888515889644623,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22281901041666666,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22281901041666666,
|
|
"eval_signal/advantage_abs_mean": 0.19127274056275687,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19127274056275687,
|
|
"eval_signal/advantage_pre_scale_std": 0.22979939977327982,
|
|
"eval_signal/advantage_std": 0.22979939977327982,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2277601733803749,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28239578008651733,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022776018207271893,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022776018207271893,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.06234860916932424,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10822617262601852,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006234860823800166,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006234860823800166,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.03320312515522043,
|
|
"eval_signal/format_reward/group_std_mean": 0.08625163851926725,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.5555555646618208,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.016601562577610213,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.016601562577610213,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002441129821818322,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004314790751474599,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0514122651463065e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0514122651463065e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.046028090020020805,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.06924534775316715,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0046028091649835306,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0046028091649835306,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03320312515522043,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.08625163851926725,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5555555646618208,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0033203125737297037,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0033203125737297037,
|
|
"eval_steps_per_second": 0.03,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2413598864691334,
|
|
"calibration/batch_distribution_entropy": 0.9695181837472722,
|
|
"calibration/buffer_distribution_entropy": 0.7721430753136292,
|
|
"calibration/confidence_entropy": 0.493656003511313,
|
|
"calibration/coverage@0%": 0.01855932303109915,
|
|
"calibration/coverage@1%": 0.01855932303109915,
|
|
"calibration/coverage@10%": 0.1056984306426477,
|
|
"calibration/coverage@15%": 0.2471195636637226,
|
|
"calibration/coverage@20%": 0.4991462525626197,
|
|
"calibration/coverage@25%": 0.6108610535718757,
|
|
"calibration/coverage@30%": 0.6864563230590697,
|
|
"calibration/coverage@5%": 0.05110525478962933,
|
|
"calibration/ece": 0.17133809174350081,
|
|
"calibration/mean_confidence": 0.5723697325482006,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018576388888888906,
|
|
"completions/max_length": 3474.6,
|
|
"completions/max_terminated_length": 3474.6,
|
|
"completions/mean_length": 683.1578247070313,
|
|
"completions/mean_terminated_length": 696.117626953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 161.6,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.0004027851391583681,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0177,
|
|
"num_tokens": 107111719.0,
|
|
"reward": 0.9484212160110473,
|
|
"reward_std": 0.1675104558467865,
|
|
"rewards/accgated_coverage_0": -0.007159726228564978,
|
|
"rewards/accgated_coverage_1": -0.007159726228564978,
|
|
"rewards/accgated_coverage_10": -0.007159726228564978,
|
|
"rewards/accgated_coverage_15": -0.007159726228564978,
|
|
"rewards/accgated_coverage_20": -0.007159726228564978,
|
|
"rewards/accgated_coverage_25": -0.007159726228564978,
|
|
"rewards/accgated_coverage_5": -0.007159726228564978,
|
|
"rewards/accuracy_reward": 0.6403645873069763,
|
|
"rewards/brier_reward": 0.7422665119171142,
|
|
"rewards/confidence_uniqueness_reward": 0.9326986908912659,
|
|
"rewards/format_reward": 0.981250011920929,
|
|
"rewards/frontier_aurc_reward": -0.002089855796657503,
|
|
"rewards/frontier_ece_reward": 0.007551212143152952,
|
|
"rewards/frontier_entropy_batch_reward": -0.2559980094432831,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.09629447013139725,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.12818139046430588,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.09629447013139725,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.12818139046430588,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.09629447013139725,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.12818139046430588,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.09629447013139725,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.12818139046430588,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.09629447013139725,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.12818139046430588,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.09629447013139725,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.12818139046430588,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.09629447013139725,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.12818139046430588,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009629447385668755,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1913899749517441,
|
|
"signal/accuracy_reward/group_std_mean": 0.25248887240886686,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2833333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09569498747587205,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09569498747587205,
|
|
"signal/advantage_abs_mean": 0.12650371789932252,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12650371789932252,
|
|
"signal/advantage_pre_scale_std": 0.1835579603910446,
|
|
"signal/advantage_std": 0.1835579603910446,
|
|
"signal/brier_reward/centered_abs_mean": 0.2032104343175888,
|
|
"signal/brier_reward/group_std_mean": 0.25411616265773773,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020321043208241463,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020321043208241463,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04227638766169548,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0690533883869648,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00422763884998858,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00422763884998858,
|
|
"signal/format_reward/centered_abs_mean": 0.03132595531642437,
|
|
"signal/format_reward/group_std_mean": 0.056155077368021014,
|
|
"signal/format_reward/group_zero_std_frac": 0.7805555582046508,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015662977658212184,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015662977658212184,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017812325153499843,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0028733307030051948,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2265406369115225e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2265406369115225e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.046052918583154676,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06805866062641144,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004605291876941919,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004605291876941919,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3332373261451721,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40649659633636476,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033323732763528825,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033323732763528825,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3141651107938186,
|
|
"calibration/batch_distribution_entropy": 0.9078290881449693,
|
|
"calibration/buffer_distribution_entropy": 0.7981656224187011,
|
|
"calibration/confidence_entropy": 0.48061793223665383,
|
|
"calibration/coverage@0%": 0.0068981182316749405,
|
|
"calibration/coverage@1%": 0.0068981182316749405,
|
|
"calibration/coverage@10%": 0.009025777806143026,
|
|
"calibration/coverage@15%": 0.2827577680892725,
|
|
"calibration/coverage@20%": 0.40764372400036875,
|
|
"calibration/coverage@25%": 0.4818494291715309,
|
|
"calibration/coverage@30%": 0.6399625689519306,
|
|
"calibration/coverage@5%": 0.0068981182316749405,
|
|
"calibration/ece": 0.1617690875291686,
|
|
"calibration/mean_confidence": 0.6608954671026755,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01605902777777779,
|
|
"completions/max_length": 3923.2,
|
|
"completions/max_terminated_length": 3923.2,
|
|
"completions/mean_length": 697.619970703125,
|
|
"completions/mean_terminated_length": 709.0864135742188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 191.2,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.0003930129169020802,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0158,
|
|
"num_tokens": 118244877.0,
|
|
"reward": 0.9352314114570618,
|
|
"reward_std": 0.16064883172512054,
|
|
"rewards/accgated_coverage_0": 0.004846313409507275,
|
|
"rewards/accgated_coverage_1": 0.004846313409507275,
|
|
"rewards/accgated_coverage_10": 0.004846313409507275,
|
|
"rewards/accgated_coverage_15": 0.004846313409507275,
|
|
"rewards/accgated_coverage_20": 0.004846313409507275,
|
|
"rewards/accgated_coverage_25": 0.004846313409507275,
|
|
"rewards/accgated_coverage_5": 0.004846313409507275,
|
|
"rewards/accuracy_reward": 0.6222222208976745,
|
|
"rewards/brier_reward": 0.7681910872459412,
|
|
"rewards/confidence_uniqueness_reward": 0.922529149055481,
|
|
"rewards/format_reward": 0.983593761920929,
|
|
"rewards/frontier_aurc_reward": -0.002249367954209447,
|
|
"rewards/frontier_ece_reward": 0.0151396244764328,
|
|
"rewards/frontier_entropy_batch_reward": -0.4162684798240662,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.06432908028364182,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.09131758958101273,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.06432908028364182,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.09131758958101273,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.06432908028364182,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.09131758958101273,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.06432908028364182,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.09131758958101273,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.06432908028364182,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.09131758958101273,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.06432908028364182,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.09131758958101273,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.06432908028364182,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.09131758958101273,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006432908400893211,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19360894560813904,
|
|
"signal/accuracy_reward/group_std_mean": 0.25349748134613037,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.28611111342906953,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09680447280406952,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09680447280406952,
|
|
"signal/advantage_abs_mean": 0.12054670006036758,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12054670006036758,
|
|
"signal/advantage_pre_scale_std": 0.18156724274158478,
|
|
"signal/advantage_std": 0.18156724274158478,
|
|
"signal/brier_reward/centered_abs_mean": 0.17527975440025328,
|
|
"signal/brier_reward/group_std_mean": 0.22410308420658112,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017527975887060166,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017527975887060166,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045045085996389386,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06884901076555253,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00450450861826539,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00450450861826539,
|
|
"signal/format_reward/centered_abs_mean": 0.02762044295668602,
|
|
"signal/format_reward/group_std_mean": 0.04820304960012436,
|
|
"signal/format_reward/group_zero_std_frac": 0.8166666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01381022147834301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01381022147834301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021717621013522146,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00348337315954268,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.71470271400176e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.71470271400176e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03851405009627342,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05781885012984276,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003851405158638954,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003851405158638954,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38271217942237856,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.44837799072265627,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03827122002840042,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03827122002840042,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2013986441728523,
|
|
"calibration/batch_distribution_entropy": 0.9596038273329303,
|
|
"calibration/buffer_distribution_entropy": 0.8192018538249008,
|
|
"calibration/confidence_entropy": 0.4998554429294299,
|
|
"calibration/coverage@0%": 0.039043696775094235,
|
|
"calibration/coverage@1%": 0.039043696775094235,
|
|
"calibration/coverage@10%": 0.2842873097864024,
|
|
"calibration/coverage@15%": 0.544249616082647,
|
|
"calibration/coverage@20%": 0.6466941225743404,
|
|
"calibration/coverage@25%": 0.709493229093955,
|
|
"calibration/coverage@30%": 0.7496021220159151,
|
|
"calibration/coverage@5%": 0.10911489599329889,
|
|
"calibration/ece": 0.13580137123877775,
|
|
"calibration/mean_confidence": 0.5785728899541247,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011718749999999977,
|
|
"completions/max_length": 3612.8,
|
|
"completions/max_terminated_length": 3612.8,
|
|
"completions/mean_length": 711.3194580078125,
|
|
"completions/mean_terminated_length": 719.77431640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 174.2,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.0004044240340590477,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0106,
|
|
"num_tokens": 129533325.0,
|
|
"reward": 0.9640808582305909,
|
|
"reward_std": 0.1412920206785202,
|
|
"rewards/accgated_coverage_0": -0.0022170018404722215,
|
|
"rewards/accgated_coverage_1": -0.0022170018404722215,
|
|
"rewards/accgated_coverage_10": -0.0022170018404722215,
|
|
"rewards/accgated_coverage_15": -0.0022170018404722215,
|
|
"rewards/accgated_coverage_20": -0.0022170018404722215,
|
|
"rewards/accgated_coverage_25": -0.0022170018404722215,
|
|
"rewards/accgated_coverage_5": -0.0022170018404722215,
|
|
"rewards/accuracy_reward": 0.65234375,
|
|
"rewards/brier_reward": 0.7770432949066162,
|
|
"rewards/confidence_uniqueness_reward": 0.938026773929596,
|
|
"rewards/format_reward": 0.9880208373069763,
|
|
"rewards/frontier_aurc_reward": -0.0016461270628497005,
|
|
"rewards/frontier_ece_reward": 0.011523347347974777,
|
|
"rewards/frontier_entropy_batch_reward": -0.2718831717967987,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08875515162944794,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11871466189622878,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08875515162944794,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11871466189622878,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08875515162944794,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11871466189622878,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08875515162944794,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11871466189622878,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08875515162944794,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11871466189622878,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08875515162944794,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11871466189622878,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08875515162944794,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11871466189622878,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008875515405088663,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17527669221162795,
|
|
"signal/accuracy_reward/group_std_mean": 0.23397073447704314,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.325,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08763834610581397,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08763834610581397,
|
|
"signal/advantage_abs_mean": 0.10502037107944488,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10502037107944488,
|
|
"signal/advantage_pre_scale_std": 0.15789510905742646,
|
|
"signal/advantage_std": 0.15789510905742646,
|
|
"signal/brier_reward/centered_abs_mean": 0.16230989396572112,
|
|
"signal/brier_reward/group_std_mean": 0.20729315876960755,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0162309892475605,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0162309892475605,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03273056633770466,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05453848391771317,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032730566337704657,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032730566337704657,
|
|
"signal/format_reward/centered_abs_mean": 0.020746527798473836,
|
|
"signal/format_reward/group_std_mean": 0.040246833488345145,
|
|
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010373263899236918,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010373263899236918,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012965922243893147,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021312762284651397,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6207403132284525e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6207403132284525e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.035742347687482835,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05180431827902794,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035742347594350577,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035742347594350577,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3321096122264862,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4016284167766571,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03321096152067184,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03321096152067184,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2615254020463663,
|
|
"calibration/batch_distribution_entropy": 0.9140696171294641,
|
|
"calibration/buffer_distribution_entropy": 0.8353317778810793,
|
|
"calibration/confidence_entropy": 0.4293006215562638,
|
|
"calibration/coverage@0%": 0.006300687857756918,
|
|
"calibration/coverage@1%": 0.006300687857756918,
|
|
"calibration/coverage@10%": 0.1367663808861394,
|
|
"calibration/coverage@15%": 0.2703061500937195,
|
|
"calibration/coverage@20%": 0.32851268883109996,
|
|
"calibration/coverage@25%": 0.3903056792973686,
|
|
"calibration/coverage@30%": 0.6369962387631143,
|
|
"calibration/coverage@5%": 0.07840595101565165,
|
|
"calibration/ece": 0.1320950617526681,
|
|
"calibration/mean_confidence": 0.6382482101514636,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011892361111111093,
|
|
"completions/max_length": 3493.2,
|
|
"completions/max_terminated_length": 3493.2,
|
|
"completions/mean_length": 690.7427001953125,
|
|
"completions/mean_terminated_length": 699.0555297851563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.0,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.00037954424624331295,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.0124,
|
|
"num_tokens": 140568825.0,
|
|
"reward": 0.9529558777809143,
|
|
"reward_std": 0.14111488908529282,
|
|
"rewards/accgated_coverage_0": 0.01084673785371706,
|
|
"rewards/accgated_coverage_1": 0.01084673785371706,
|
|
"rewards/accgated_coverage_10": 0.01084673785371706,
|
|
"rewards/accgated_coverage_15": 0.01084673785371706,
|
|
"rewards/accgated_coverage_20": 0.01084673785371706,
|
|
"rewards/accgated_coverage_25": 0.01084673785371706,
|
|
"rewards/accgated_coverage_5": 0.01084673785371706,
|
|
"rewards/accuracy_reward": 0.63125,
|
|
"rewards/brier_reward": 0.7789486169815063,
|
|
"rewards/confidence_uniqueness_reward": 0.9293984413146973,
|
|
"rewards/format_reward": 0.9881076455116272,
|
|
"rewards/frontier_aurc_reward": -0.001911242282949388,
|
|
"rewards/frontier_ece_reward": 0.016348773241043092,
|
|
"rewards/frontier_entropy_batch_reward": -0.3676132559776306,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07885657548904419,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10609191805124282,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07885657548904419,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10609191805124282,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07885657548904419,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10609191805124282,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.07885657548904419,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.10609191805124282,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.07885657548904419,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.10609191805124282,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.07885657548904419,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.10609191805124282,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07885657548904419,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10609191805124282,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007885657250881195,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18004557192325593,
|
|
"signal/accuracy_reward/group_std_mean": 0.23771512806415557,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3222222298383713,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09002278596162797,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09002278596162797,
|
|
"signal/advantage_abs_mean": 0.10413759350776672,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10413759350776672,
|
|
"signal/advantage_pre_scale_std": 0.16083113849163055,
|
|
"signal/advantage_std": 0.16083113849163055,
|
|
"signal/brier_reward/centered_abs_mean": 0.16419869065284728,
|
|
"signal/brier_reward/group_std_mean": 0.2109546959400177,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016419869475066663,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016419869475066663,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03784245103597641,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06083763241767883,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003784245066344738,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003784245066344738,
|
|
"signal/format_reward/centered_abs_mean": 0.020556640811264515,
|
|
"signal/format_reward/group_std_mean": 0.040697479248046876,
|
|
"signal/format_reward/group_zero_std_frac": 0.8250000238418579,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010278320405632257,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010278320405632257,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018522955011576413,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002898959442973137,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3153694564825854e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3153694564825854e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03703445121645928,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.053357198089361194,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037034451961517334,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037034451961517334,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3631279289722443,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43171375393867495,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03631279319524765,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03631279319524765,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22257555095357223,
|
|
"calibration/batch_distribution_entropy": 0.9400871823295274,
|
|
"calibration/buffer_distribution_entropy": 0.8464505395459649,
|
|
"calibration/confidence_entropy": 0.4936839122169815,
|
|
"calibration/coverage@0%": 0.016386362001433938,
|
|
"calibration/coverage@1%": 0.016386362001433938,
|
|
"calibration/coverage@10%": 0.24796221501980664,
|
|
"calibration/coverage@15%": 0.33072087018791063,
|
|
"calibration/coverage@20%": 0.4732882211713065,
|
|
"calibration/coverage@25%": 0.649742582351726,
|
|
"calibration/coverage@30%": 0.7411867364746947,
|
|
"calibration/coverage@5%": 0.11962625701455729,
|
|
"calibration/ece": 0.14055367731730012,
|
|
"calibration/mean_confidence": 0.6228957550672582,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008072916666666697,
|
|
"completions/max_length": 2918.4,
|
|
"completions/max_terminated_length": 2918.4,
|
|
"completions/mean_length": 692.5636352539062,
|
|
"completions/mean_terminated_length": 698.1532470703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 199.6,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.00042614529957063496,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0077,
|
|
"num_tokens": 151612054.0,
|
|
"reward": 0.9811522126197815,
|
|
"reward_std": 0.1300223708152771,
|
|
"rewards/accgated_coverage_0": 0.0006343376822769642,
|
|
"rewards/accgated_coverage_1": 0.0006343376822769642,
|
|
"rewards/accgated_coverage_10": 0.0006343376822769642,
|
|
"rewards/accgated_coverage_15": 0.0006343376822769642,
|
|
"rewards/accgated_coverage_20": 0.0006343376822769642,
|
|
"rewards/accgated_coverage_25": 0.0006343376822769642,
|
|
"rewards/accgated_coverage_5": 0.0006343376822769642,
|
|
"rewards/accuracy_reward": 0.6845486164093018,
|
|
"rewards/brier_reward": 0.7983974099159241,
|
|
"rewards/confidence_uniqueness_reward": 0.9372838854789733,
|
|
"rewards/format_reward": 0.9917534708976745,
|
|
"rewards/frontier_aurc_reward": -0.0014764688210561871,
|
|
"rewards/frontier_ece_reward": 0.01250108890235424,
|
|
"rewards/frontier_entropy_batch_reward": -0.32242658734321594,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08589751869440079,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11587843298912048,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08589751869440079,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11587843298912048,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08589751869440079,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11587843298912048,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08589751869440079,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11587843298912048,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08589751869440079,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11587843298912048,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08589751869440079,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11587843298912048,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08589751869440079,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11587843298912048,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008589751925319434,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1838541656732559,
|
|
"signal/accuracy_reward/group_std_mean": 0.23774852454662324,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3416666716337204,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09192708283662795,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09192708283662795,
|
|
"signal/advantage_abs_mean": 0.0963760793209076,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0963760793209076,
|
|
"signal/advantage_pre_scale_std": 0.15004458129405976,
|
|
"signal/advantage_std": 0.15004458129405976,
|
|
"signal/brier_reward/centered_abs_mean": 0.14227010905742646,
|
|
"signal/brier_reward/group_std_mean": 0.1828959047794342,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014227011241018772,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014227011241018772,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029756984487175942,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.050496813654899594,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002975698420777917,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002975698420777917,
|
|
"signal/format_reward/centered_abs_mean": 0.015228949673473834,
|
|
"signal/format_reward/group_std_mean": 0.03322426415979862,
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007614474836736917,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007614474836736917,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001322699082084,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020757037913426758,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.653373910812661e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.653373910812661e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03186420500278473,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.046358488500118256,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003186420677229762,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003186420677229762,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33970091938972474,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4100371837615967,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03397009335458279,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03397009335458279,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18373823770605507,
|
|
"calibration/batch_distribution_entropy": 0.9194311712417902,
|
|
"calibration/buffer_distribution_entropy": 0.8560351604828632,
|
|
"calibration/confidence_entropy": 0.4625009964263458,
|
|
"calibration/coverage@0%": 0.049749790760420874,
|
|
"calibration/coverage@1%": 0.08692256562953082,
|
|
"calibration/coverage@10%": 0.34027520819356083,
|
|
"calibration/coverage@15%": 0.3944861092123807,
|
|
"calibration/coverage@20%": 0.6651673002075583,
|
|
"calibration/coverage@25%": 0.7554071648312486,
|
|
"calibration/coverage@30%": 0.8342456742456743,
|
|
"calibration/coverage@5%": 0.10103960315134583,
|
|
"calibration/ece": 0.14968959270054943,
|
|
"calibration/mean_confidence": 0.6513386481500065,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013368055555555558,
|
|
"completions/max_length": 3756.6,
|
|
"completions/max_terminated_length": 3756.6,
|
|
"completions/mean_length": 750.0751831054688,
|
|
"completions/mean_terminated_length": 760.394775390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 220.6,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.000407382205594331,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0129,
|
|
"num_tokens": 163306200.0,
|
|
"reward": 0.9649693131446838,
|
|
"reward_std": 0.1357353910803795,
|
|
"rewards/accgated_coverage_0": -0.00146528814220801,
|
|
"rewards/accgated_coverage_1": -0.00146528814220801,
|
|
"rewards/accgated_coverage_10": -0.00146528814220801,
|
|
"rewards/accgated_coverage_15": -0.00146528814220801,
|
|
"rewards/accgated_coverage_20": -0.00146528814220801,
|
|
"rewards/accgated_coverage_25": -0.00146528814220801,
|
|
"rewards/accgated_coverage_5": -0.00146528814220801,
|
|
"rewards/accuracy_reward": 0.6591145753860473,
|
|
"rewards/brier_reward": 0.7822228908538819,
|
|
"rewards/confidence_uniqueness_reward": 0.9348131895065308,
|
|
"rewards/format_reward": 0.9866319537162781,
|
|
"rewards/frontier_aurc_reward": -0.001650554989464581,
|
|
"rewards/frontier_ece_reward": 0.011803200282156468,
|
|
"rewards/frontier_entropy_batch_reward": -0.2974155843257904,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08280792832374573,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11122777611017227,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08280792832374573,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11122777611017227,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08280792832374573,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11122777611017227,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08280792832374573,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11122777611017227,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08280792832374573,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11122777611017227,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08280792832374573,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11122777611017227,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08280792832374573,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11122777611017227,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008280793204903603,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18087565004825593,
|
|
"signal/accuracy_reward/group_std_mean": 0.24078828990459442,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.30555556118488314,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09043782502412796,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09043782502412796,
|
|
"signal/advantage_abs_mean": 0.10051447302103042,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10051447302103042,
|
|
"signal/advantage_pre_scale_std": 0.15557511448860167,
|
|
"signal/advantage_std": 0.15557511448860167,
|
|
"signal/brier_reward/centered_abs_mean": 0.14737005531787872,
|
|
"signal/brier_reward/group_std_mean": 0.1906406193971634,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014737005904316902,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014737005904316902,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03383201137185097,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.055407488346099855,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003383201127871871,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003383201127871871,
|
|
"signal/format_reward/centered_abs_mean": 0.02119140587747097,
|
|
"signal/format_reward/group_std_mean": 0.04045570828020573,
|
|
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010595702938735485,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010595702938735485,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014987794915214182,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023568985518068073,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.873474338935921e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.873474338935921e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03193121068179607,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04668203741312027,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031931213103234767,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031931213103234767,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33423511385917665,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40302748084068296,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0334235105663538,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0334235105663538,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1659009748605335,
|
|
"calibration/batch_distribution_entropy": 0.9631188152950273,
|
|
"calibration/buffer_distribution_entropy": 0.8654873779220141,
|
|
"calibration/confidence_entropy": 0.47541938570574993,
|
|
"calibration/coverage@0%": 0.03987973405736563,
|
|
"calibration/coverage@1%": 0.03987973405736563,
|
|
"calibration/coverage@10%": 0.2952229479904432,
|
|
"calibration/coverage@15%": 0.5116889055028054,
|
|
"calibration/coverage@20%": 0.6731285252313406,
|
|
"calibration/coverage@25%": 0.7771573541029104,
|
|
"calibration/coverage@30%": 0.8733169729880256,
|
|
"calibration/coverage@5%": 0.16991762592040008,
|
|
"calibration/ece": 0.11026244108539315,
|
|
"calibration/mean_confidence": 0.5748356978844124,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009201388888888907,
|
|
"completions/max_length": 3788.2,
|
|
"completions/max_terminated_length": 3788.2,
|
|
"completions/mean_length": 749.9548583984375,
|
|
"completions/mean_terminated_length": 756.955517578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 251.6,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.0003434408863540739,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0101,
|
|
"num_tokens": 175032880.0,
|
|
"reward": 0.9802459597587585,
|
|
"reward_std": 0.12824074923992157,
|
|
"rewards/accgated_coverage_0": 0.009267809754237532,
|
|
"rewards/accgated_coverage_1": 0.009267809754237532,
|
|
"rewards/accgated_coverage_10": 0.009267809754237532,
|
|
"rewards/accgated_coverage_15": 0.009267809754237532,
|
|
"rewards/accgated_coverage_20": 0.009267809754237532,
|
|
"rewards/accgated_coverage_25": 0.009267809754237532,
|
|
"rewards/accgated_coverage_5": 0.009267809754237532,
|
|
"rewards/accuracy_reward": 0.6739583373069763,
|
|
"rewards/brier_reward": 0.8075190901756286,
|
|
"rewards/confidence_uniqueness_reward": 0.9344035863876343,
|
|
"rewards/format_reward": 0.990711796283722,
|
|
"rewards/frontier_aurc_reward": -0.001392999361269176,
|
|
"rewards/frontier_ece_reward": 0.015480473451316357,
|
|
"rewards/frontier_entropy_batch_reward": -0.34299505352973936,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07954660803079605,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10623638331890106,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07954660803079605,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10623638331890106,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07954660803079605,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10623638331890106,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.07954660803079605,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.10623638331890106,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.07954660803079605,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.10623638331890106,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.07954660803079605,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.10623638331890106,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07954660803079605,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10623638331890106,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007954660896211862,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17490234673023225,
|
|
"signal/accuracy_reward/group_std_mean": 0.2357180804014206,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.30555556416511537,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08745117336511612,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08745117336511612,
|
|
"signal/advantage_abs_mean": 0.09189856797456741,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09189856797456741,
|
|
"signal/advantage_pre_scale_std": 0.14546703696250915,
|
|
"signal/advantage_std": 0.14546703696250915,
|
|
"signal/brier_reward/centered_abs_mean": 0.13718933761119842,
|
|
"signal/brier_reward/group_std_mean": 0.17911297082901,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013718934170901776,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013718934170901776,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.032110657170414926,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05388362035155296,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032110656145960094,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032110656145960094,
|
|
"signal/format_reward/centered_abs_mean": 0.01693250872194767,
|
|
"signal/format_reward/group_std_mean": 0.03573401048779488,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008466254360973835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008466254360973835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014202272053807975,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022525871871039273,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.775284035829827e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.775284035829827e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03186605237424374,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0464069627225399,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031866051722317934,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031866051722317934,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33895500302314757,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.410440057516098,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033895500004291534,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033895500004291534,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12977931025427208,
|
|
"calibration/batch_distribution_entropy": 0.958446248326358,
|
|
"calibration/buffer_distribution_entropy": 0.8750197391454562,
|
|
"calibration/confidence_entropy": 0.47956886261730636,
|
|
"calibration/coverage@0%": 0.08705992500365711,
|
|
"calibration/coverage@1%": 0.09907036886788688,
|
|
"calibration/coverage@10%": 0.47727502918121056,
|
|
"calibration/coverage@15%": 0.6430634133674967,
|
|
"calibration/coverage@20%": 0.7598805387698602,
|
|
"calibration/coverage@25%": 0.8284771858836599,
|
|
"calibration/coverage@30%": 0.9102535133645633,
|
|
"calibration/coverage@5%": 0.3457808145752194,
|
|
"calibration/ece": 0.13963566186460488,
|
|
"calibration/mean_confidence": 0.5722278866912511,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009114583333333348,
|
|
"completions/max_length": 3512.2,
|
|
"completions/max_terminated_length": 3512.2,
|
|
"completions/mean_length": 737.26015625,
|
|
"completions/mean_terminated_length": 744.0468505859375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 217.8,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.00042602099711075425,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0082,
|
|
"num_tokens": 186594789.0,
|
|
"reward": 0.981863534450531,
|
|
"reward_std": 0.12287124991416931,
|
|
"rewards/accgated_coverage_0": 0.005918828677386046,
|
|
"rewards/accgated_coverage_1": 0.005918828677386046,
|
|
"rewards/accgated_coverage_10": 0.005918828677386046,
|
|
"rewards/accgated_coverage_15": 0.005918828677386046,
|
|
"rewards/accgated_coverage_20": 0.005918828677386046,
|
|
"rewards/accgated_coverage_25": 0.005918828677386046,
|
|
"rewards/accgated_coverage_5": 0.005918828677386046,
|
|
"rewards/accuracy_reward": 0.668836796283722,
|
|
"rewards/brier_reward": 0.7972975611686707,
|
|
"rewards/confidence_uniqueness_reward": 0.9405464291572571,
|
|
"rewards/format_reward": 0.9907118082046509,
|
|
"rewards/frontier_aurc_reward": -0.001252932590432465,
|
|
"rewards/frontier_ece_reward": 0.013159998878836631,
|
|
"rewards/frontier_entropy_batch_reward": -0.27138724327087405,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08956028670072555,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11862562745809554,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08956028670072555,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11862562745809554,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08956028670072555,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11862562745809554,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08956028670072555,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11862562745809554,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08956028670072555,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11862562745809554,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08956028670072555,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11862562745809554,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08956028670072555,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11862562745809554,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008956028707325458,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17049153745174409,
|
|
"signal/accuracy_reward/group_std_mean": 0.22411769330501558,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08524576872587204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08524576872587204,
|
|
"signal/advantage_abs_mean": 0.08913673162460327,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08913673162460327,
|
|
"signal/advantage_pre_scale_std": 0.14024181962013244,
|
|
"signal/advantage_std": 0.14024181962013244,
|
|
"signal/brier_reward/centered_abs_mean": 0.14370980560779573,
|
|
"signal/brier_reward/group_std_mean": 0.18593351542949677,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014370980486273766,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014370980486273766,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028378911688923834,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04979285299777984,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028378912713378666,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028378912713378666,
|
|
"signal/format_reward/centered_abs_mean": 0.01656358502805233,
|
|
"signal/format_reward/group_std_mean": 0.03597295694053173,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333492279053,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008281792514026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008281792514026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012377587612718345,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001967509277164936,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5471983897441532e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5471983897441532e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03037920966744423,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.043885117024183275,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030379209667444227,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030379209667444227,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3221738815307617,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3930954456329346,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03221738450229168,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03221738450229168,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18050527287688292,
|
|
"calibration/batch_distribution_entropy": 0.9483888084359655,
|
|
"calibration/buffer_distribution_entropy": 0.8852656327097506,
|
|
"calibration/confidence_entropy": 0.4446431319687152,
|
|
"calibration/coverage@0%": 0.015151934406954707,
|
|
"calibration/coverage@1%": 0.015151934406954707,
|
|
"calibration/coverage@10%": 0.5202712232182465,
|
|
"calibration/coverage@15%": 0.5869756896666373,
|
|
"calibration/coverage@20%": 0.6468486084413063,
|
|
"calibration/coverage@25%": 0.6841459144943153,
|
|
"calibration/coverage@30%": 0.7356502203434319,
|
|
"calibration/coverage@5%": 0.22796958061671996,
|
|
"calibration/ece": 0.15324055615517662,
|
|
"calibration/mean_confidence": 0.5787011570773354,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006336805555555536,
|
|
"completions/max_length": 3316.2,
|
|
"completions/max_terminated_length": 3316.2,
|
|
"completions/mean_length": 738.9697265625,
|
|
"completions/mean_terminated_length": 743.6612426757813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 208.2,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.0003902704920619726,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0059,
|
|
"num_tokens": 198199400.0,
|
|
"reward": 0.977604615688324,
|
|
"reward_std": 0.12400392889976501,
|
|
"rewards/accgated_coverage_0": 0.013323387503623963,
|
|
"rewards/accgated_coverage_1": 0.013323387503623963,
|
|
"rewards/accgated_coverage_10": 0.013323387503623963,
|
|
"rewards/accgated_coverage_15": 0.013323387503623963,
|
|
"rewards/accgated_coverage_20": 0.013323387503623963,
|
|
"rewards/accgated_coverage_25": 0.013323387503623963,
|
|
"rewards/accgated_coverage_5": 0.013323387503623963,
|
|
"rewards/accuracy_reward": 0.6585069417953491,
|
|
"rewards/brier_reward": 0.807054877281189,
|
|
"rewards/confidence_uniqueness_reward": 0.9377588510513306,
|
|
"rewards/format_reward": 0.9935763955116272,
|
|
"rewards/frontier_aurc_reward": -0.001382858515717089,
|
|
"rewards/frontier_ece_reward": 0.015744138136506082,
|
|
"rewards/frontier_entropy_batch_reward": -0.3380190432071686,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07688793241977691,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10520303398370742,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07688793241977691,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10520303398370742,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07688793241977691,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10520303398370742,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.07688793241977691,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.10520303398370742,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.07688793241977691,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.10520303398370742,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.07688793241977691,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.10520303398370742,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07688793241977691,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10520303398370742,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007688793074339628,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16501736044883727,
|
|
"signal/accuracy_reward/group_std_mean": 0.22163840532302856,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08250868022441864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08250868022441864,
|
|
"signal/advantage_abs_mean": 0.09041554778814316,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09041554778814316,
|
|
"signal/advantage_pre_scale_std": 0.142058926820755,
|
|
"signal/advantage_std": 0.142058926820755,
|
|
"signal/brier_reward/centered_abs_mean": 0.13740523755550385,
|
|
"signal/brier_reward/group_std_mean": 0.1825299233198166,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013740524649620056,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013740524649620056,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027925553545355798,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04514765739440918,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002792555373162031,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002792555373162031,
|
|
"signal/format_reward/centered_abs_mean": 0.011762152798473835,
|
|
"signal/format_reward/group_std_mean": 0.025851282477378845,
|
|
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005881076399236918,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005881076399236918,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015603850362822414,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025688192108646035,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.950481346284505e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.950481346284505e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03039446845650673,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04404748827219009,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00303944693878293,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00303944693878293,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35081249475479126,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42071945071220396,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035081248730421066,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035081248730421066,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14878210287435184,
|
|
"calibration/batch_distribution_entropy": 0.9685480405199234,
|
|
"calibration/buffer_distribution_entropy": 0.8919329426013801,
|
|
"calibration/confidence_entropy": 0.5030879963092827,
|
|
"calibration/coverage@0%": 0.050392448689359805,
|
|
"calibration/coverage@1%": 0.050392448689359805,
|
|
"calibration/coverage@10%": 0.29977935142486195,
|
|
"calibration/coverage@15%": 0.6217676940395167,
|
|
"calibration/coverage@20%": 0.7900059832613835,
|
|
"calibration/coverage@25%": 0.8822578230367519,
|
|
"calibration/coverage@30%": 0.9150943263459359,
|
|
"calibration/coverage@5%": 0.1633913711877651,
|
|
"calibration/ece": 0.15545339149716966,
|
|
"calibration/mean_confidence": 0.557937458366312,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008940972222222232,
|
|
"completions/max_length": 3830.8,
|
|
"completions/max_terminated_length": 3830.8,
|
|
"completions/mean_length": 756.8622436523438,
|
|
"completions/mean_terminated_length": 763.7328247070312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 223.2,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.0003215717733837664,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0086,
|
|
"num_tokens": 210017525.0,
|
|
"reward": 0.98323655128479,
|
|
"reward_std": 0.12426994442939758,
|
|
"rewards/accgated_coverage_0": 0.012122076144441963,
|
|
"rewards/accgated_coverage_1": 0.012122076144441963,
|
|
"rewards/accgated_coverage_10": 0.012122076144441963,
|
|
"rewards/accgated_coverage_15": 0.012122076144441963,
|
|
"rewards/accgated_coverage_20": 0.012122076144441963,
|
|
"rewards/accgated_coverage_25": 0.012122076144441963,
|
|
"rewards/accgated_coverage_5": 0.012122076144441963,
|
|
"rewards/accuracy_reward": 0.6693576455116272,
|
|
"rewards/brier_reward": 0.8081021189689637,
|
|
"rewards/confidence_uniqueness_reward": 0.9393685340881348,
|
|
"rewards/format_reward": 0.9909722208976746,
|
|
"rewards/frontier_aurc_reward": -0.0011498184525407852,
|
|
"rewards/frontier_ece_reward": 0.01175499688833952,
|
|
"rewards/frontier_entropy_batch_reward": -0.3132203757762909,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0879080355167389,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11624083817005157,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0879080355167389,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11624083817005157,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.0879080355167389,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11624083817005157,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0879080355167389,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11624083817005157,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.0879080355167389,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11624083817005157,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0879080355167389,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11624083817005157,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0879080355167389,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11624083817005157,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008790803793817758,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17026366889476777,
|
|
"signal/accuracy_reward/group_std_mean": 0.22329876124858855,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36944444179534913,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08513183444738388,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08513183444738388,
|
|
"signal/advantage_abs_mean": 0.09176785200834274,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09176785200834274,
|
|
"signal/advantage_pre_scale_std": 0.14243557453155517,
|
|
"signal/advantage_std": 0.14243557453155517,
|
|
"signal/brier_reward/centered_abs_mean": 0.13790313005447388,
|
|
"signal/brier_reward/group_std_mean": 0.1790826916694641,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013790314458310604,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013790314458310604,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028661540523171426,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04833717867732048,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028661541175097225,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028661541175097225,
|
|
"signal/format_reward/centered_abs_mean": 0.016037326119840146,
|
|
"signal/format_reward/group_std_mean": 0.033191120624542235,
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008018663059920073,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008018663059920073,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0010956939542666078,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001799008040688932,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.369617530144751e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.369617530144751e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.025799740105867386,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.037496446073055266,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002579974289983511,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002579974289983511,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3498570203781128,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.417994225025177,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03498570322990417,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03498570322990417,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.1517072771943686,
|
|
"eval_calibration/batch_distribution_entropy": 0.8719902280793708,
|
|
"eval_calibration/buffer_distribution_entropy": 0.8971747945256113,
|
|
"eval_calibration/confidence_entropy": 0.43289156185263855,
|
|
"eval_calibration/coverage@0%": 0.234375,
|
|
"eval_calibration/coverage@1%": 0.234375,
|
|
"eval_calibration/coverage@10%": 0.4427083333333333,
|
|
"eval_calibration/coverage@15%": 0.625,
|
|
"eval_calibration/coverage@20%": 0.75,
|
|
"eval_calibration/coverage@25%": 0.8854166666666666,
|
|
"eval_calibration/coverage@30%": 0.9322916666666666,
|
|
"eval_calibration/coverage@5%": 0.2708333333333333,
|
|
"eval_calibration/ece": 0.17274301417681212,
|
|
"eval_calibration/mean_confidence": 0.6774464357123575,
|
|
"eval_completions/clipped_ratio": 0.005208333333333333,
|
|
"eval_completions/max_length": 2554.0,
|
|
"eval_completions/max_terminated_length": 2554.0,
|
|
"eval_completions/mean_length": 727.868418375651,
|
|
"eval_completions/mean_terminated_length": 731.6888529459635,
|
|
"eval_completions/min_length": 95.16666666666667,
|
|
"eval_completions/min_terminated_length": 270.8333333333333,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 210017525.0,
|
|
"eval_reward": 0.9147416253884634,
|
|
"eval_reward_std": 0.2155863419175148,
|
|
"eval_rewards/accgated_coverage_0": 0.02142564587605496,
|
|
"eval_rewards/accgated_coverage_1": 0.02142564587605496,
|
|
"eval_rewards/accgated_coverage_10": 0.02142564587605496,
|
|
"eval_rewards/accgated_coverage_15": 0.02142564587605496,
|
|
"eval_rewards/accgated_coverage_20": 0.02142564587605496,
|
|
"eval_rewards/accgated_coverage_25": 0.02142564587605496,
|
|
"eval_rewards/accgated_coverage_5": 0.02142564587605496,
|
|
"eval_rewards/accuracy_reward": 0.6623263855775198,
|
|
"eval_rewards/brier_reward": 0.821544220050176,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8742929995059967,
|
|
"eval_rewards/format_reward": 0.9930555522441864,
|
|
"eval_rewards/frontier_aurc_reward": -0.0016115416074171662,
|
|
"eval_rewards/frontier_ece_reward": 0.017946766689419746,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9930555522441864,
|
|
"eval_runtime": 198.4437,
|
|
"eval_samples_per_second": 5.039,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.09550586342811584,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.14860529700915018,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.09550586342811584,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.14860529700915018,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.09550586342811584,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.14860529700915018,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.09550586342811584,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.14860529700915018,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.09550586342811584,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.14860529700915018,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.09550586342811584,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.14860529700915018,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.09550586342811584,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.14860529700915018,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009550586187591156,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.43505859375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.472920889655749,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.217529296875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.217529296875,
|
|
"eval_signal/advantage_abs_mean": 0.18196682880322138,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.18196682880322138,
|
|
"eval_signal/advantage_pre_scale_std": 0.21450272450844446,
|
|
"eval_signal/advantage_std": 0.21450272450844446,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19742226352294287,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2611571674545606,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01974222684899966,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01974222684899966,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.056711938232183456,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08472888544201851,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00567119390082856,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00567119390082856,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.013454860697189966,
|
|
"eval_signal/format_reward/group_std_mean": 0.03928371022144953,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.006727430348594983,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.006727430348594983,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0027537442122896514,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004981053527444601,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.442180301741852e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.442180301741852e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03306501638144255,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.05123907576004664,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033065018554528556,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033065018554528556,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013454860697189966,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.03928371022144953,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777778009573618,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0013454861279266577,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013454861279266577,
|
|
"eval_steps_per_second": 0.03,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27191994998532804,
|
|
"calibration/batch_distribution_entropy": 0.9110664528383987,
|
|
"calibration/buffer_distribution_entropy": 0.8976974161527924,
|
|
"calibration/confidence_entropy": 0.4418744288231554,
|
|
"calibration/coverage@0%": 0.07043912039759519,
|
|
"calibration/coverage@1%": 0.0741428241012989,
|
|
"calibration/coverage@10%": 0.20709950140523467,
|
|
"calibration/coverage@15%": 0.2794609689211093,
|
|
"calibration/coverage@20%": 0.3744648370189722,
|
|
"calibration/coverage@25%": 0.44615183007087034,
|
|
"calibration/coverage@30%": 0.5388811184790099,
|
|
"calibration/coverage@5%": 0.15804877406469012,
|
|
"calibration/ece": 0.14876870881845822,
|
|
"calibration/mean_confidence": 0.6542153198229441,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012760416666666653,
|
|
"completions/max_length": 3785.4,
|
|
"completions/max_terminated_length": 3785.4,
|
|
"completions/mean_length": 725.8693725585938,
|
|
"completions/mean_terminated_length": 735.1307983398438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.8,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.0007082828669808805,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.0143,
|
|
"num_tokens": 221456404.0,
|
|
"reward": 0.9740729570388794,
|
|
"reward_std": 0.1306391790509224,
|
|
"rewards/accgated_coverage_0": 0.01679096817970276,
|
|
"rewards/accgated_coverage_1": 0.01679096817970276,
|
|
"rewards/accgated_coverage_10": 0.01679096817970276,
|
|
"rewards/accgated_coverage_15": 0.01679096817970276,
|
|
"rewards/accgated_coverage_20": 0.01679096817970276,
|
|
"rewards/accgated_coverage_25": 0.01679096817970276,
|
|
"rewards/accgated_coverage_5": 0.01679096817970276,
|
|
"rewards/accuracy_reward": 0.6658854126930237,
|
|
"rewards/brier_reward": 0.8182917714118958,
|
|
"rewards/confidence_uniqueness_reward": 0.92508784532547,
|
|
"rewards/format_reward": 0.9872395753860473,
|
|
"rewards/frontier_aurc_reward": -0.0014265108155086636,
|
|
"rewards/frontier_ece_reward": 0.017224435694515705,
|
|
"rewards/frontier_entropy_batch_reward": -0.40285754203796387,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.06954341232776642,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.09541115164756775,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.06954341232776642,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.09541115164756775,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.06954341232776642,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.09541115164756775,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.06954341232776642,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.09541115164756775,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.06954341232776642,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.09541115164756775,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.06954341232776642,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.09541115164756775,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.06954341232776642,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.09541115164756775,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.006954341474920511,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16107313334941864,
|
|
"signal/accuracy_reward/group_std_mean": 0.21490317285060884,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38888888955116274,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08053656667470932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08053656667470932,
|
|
"signal/advantage_abs_mean": 0.0953233152627945,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0953233152627945,
|
|
"signal/advantage_pre_scale_std": 0.15531104803085327,
|
|
"signal/advantage_std": 0.15531104803085327,
|
|
"signal/brier_reward/centered_abs_mean": 0.1313171371817589,
|
|
"signal/brier_reward/group_std_mean": 0.1736193746328354,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013131714053452015,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013131714053452015,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03815034255385399,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06041007116436958,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038150343578308822,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038150343578308822,
|
|
"signal/format_reward/centered_abs_mean": 0.021175130270421505,
|
|
"signal/format_reward/group_std_mean": 0.04042814746499061,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010587565135210752,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010587565135210752,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001724409847520292,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002808917826041579,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1555123385041953e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1555123385041953e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.029964320734143256,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04247441366314888,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002996431989595294,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002996431989595294,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3408441662788391,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4115506410598755,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03408441767096519,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03408441767096519,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12744663592497546,
|
|
"calibration/batch_distribution_entropy": 0.9519339153112962,
|
|
"calibration/buffer_distribution_entropy": 0.9010714042039174,
|
|
"calibration/confidence_entropy": 0.4827597225891366,
|
|
"calibration/coverage@0%": 0.11921970763456483,
|
|
"calibration/coverage@1%": 0.11921970763456483,
|
|
"calibration/coverage@10%": 0.4370017656603837,
|
|
"calibration/coverage@15%": 0.6632638952974681,
|
|
"calibration/coverage@20%": 0.7718870891274262,
|
|
"calibration/coverage@25%": 0.8532847355064792,
|
|
"calibration/coverage@30%": 0.9258258532038015,
|
|
"calibration/coverage@5%": 0.29218016594856466,
|
|
"calibration/ece": 0.13218755371540838,
|
|
"calibration/mean_confidence": 0.5731048630256981,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011024305555555558,
|
|
"completions/max_length": 3485.8,
|
|
"completions/max_terminated_length": 3485.8,
|
|
"completions/mean_length": 725.3192749023438,
|
|
"completions/mean_terminated_length": 733.386181640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 225.2,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.0004119804943911731,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0112,
|
|
"num_tokens": 232920530.0,
|
|
"reward": 0.9900946617126465,
|
|
"reward_std": 0.1217676728963852,
|
|
"rewards/accgated_coverage_0": 0.005296125635504722,
|
|
"rewards/accgated_coverage_1": 0.005296125635504722,
|
|
"rewards/accgated_coverage_10": 0.005296125635504722,
|
|
"rewards/accgated_coverage_15": 0.005296125635504722,
|
|
"rewards/accgated_coverage_20": 0.005296125635504722,
|
|
"rewards/accgated_coverage_25": 0.005296125635504722,
|
|
"rewards/accgated_coverage_5": 0.005296125635504722,
|
|
"rewards/accuracy_reward": 0.6916666626930237,
|
|
"rewards/brier_reward": 0.8105852246284485,
|
|
"rewards/confidence_uniqueness_reward": 0.9374894022941589,
|
|
"rewards/format_reward": 0.9888888955116272,
|
|
"rewards/frontier_aurc_reward": -0.0009289152920246124,
|
|
"rewards/frontier_ece_reward": 0.009040744509547949,
|
|
"rewards/frontier_entropy_batch_reward": -0.2959034085273743,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.09564257562160491,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.12577018290758132,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.09564257562160491,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.12577018290758132,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.09564257562160491,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.12577018290758132,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.09564257562160491,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.12577018290758132,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.09564257562160491,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.12577018290758132,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.09564257562160491,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.12577018290758132,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.09564257562160491,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.12577018290758132,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009564257692545652,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17435981035232545,
|
|
"signal/accuracy_reward/group_std_mean": 0.23566536605358124,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3083333343267441,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08717990517616273,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08717990517616273,
|
|
"signal/advantage_abs_mean": 0.08975694328546524,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08975694328546524,
|
|
"signal/advantage_pre_scale_std": 0.1408482313156128,
|
|
"signal/advantage_std": 0.1408482313156128,
|
|
"signal/brier_reward/centered_abs_mean": 0.1274586006999016,
|
|
"signal/brier_reward/group_std_mean": 0.16526381373405458,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012745860032737255,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012745860032737255,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03015372306108475,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04796536043286324,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030153723899275066,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030153723899275066,
|
|
"signal/format_reward/centered_abs_mean": 0.01713324636220932,
|
|
"signal/format_reward/group_std_mean": 0.03228283934295177,
|
|
"signal/format_reward/group_zero_std_frac": 0.8638888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00856662318110466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00856662318110466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0008368753246031701,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0013955856789834797,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.046094139383058e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.046094139383058e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023550980538129807,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.033878795057535174,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002355098072439432,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002355098072439432,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3411864399909973,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41080782413482664,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034118644148111346,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034118644148111346,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2565453334486796,
|
|
"calibration/batch_distribution_entropy": 0.9575537318941236,
|
|
"calibration/buffer_distribution_entropy": 0.9060671449072653,
|
|
"calibration/confidence_entropy": 0.46890768621541457,
|
|
"calibration/coverage@0%": 0.019065294749141152,
|
|
"calibration/coverage@1%": 0.019065294749141152,
|
|
"calibration/coverage@10%": 0.15182530570872965,
|
|
"calibration/coverage@15%": 0.33773805086252534,
|
|
"calibration/coverage@20%": 0.50743781469926,
|
|
"calibration/coverage@25%": 0.6094538999487293,
|
|
"calibration/coverage@30%": 0.6899513244607858,
|
|
"calibration/coverage@5%": 0.052038626270536105,
|
|
"calibration/ece": 0.16751469598705682,
|
|
"calibration/mean_confidence": 0.5752961614557014,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013888888888888885,
|
|
"completions/max_length": 3733.2,
|
|
"completions/max_terminated_length": 3733.2,
|
|
"completions/mean_length": 734.7553833007812,
|
|
"completions/mean_terminated_length": 745.2282348632813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 218.4,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.00032408704282715917,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0141,
|
|
"num_tokens": 244464112.0,
|
|
"reward": 0.973526930809021,
|
|
"reward_std": 0.12945592552423477,
|
|
"rewards/accgated_coverage_0": 0.019186272844672204,
|
|
"rewards/accgated_coverage_1": 0.019186272844672204,
|
|
"rewards/accgated_coverage_10": 0.019186272844672204,
|
|
"rewards/accgated_coverage_15": 0.019186272844672204,
|
|
"rewards/accgated_coverage_20": 0.019186272844672204,
|
|
"rewards/accgated_coverage_25": 0.019186272844672204,
|
|
"rewards/accgated_coverage_5": 0.019186272844672204,
|
|
"rewards/accuracy_reward": 0.6506944537162781,
|
|
"rewards/brier_reward": 0.8081199884414673,
|
|
"rewards/confidence_uniqueness_reward": 0.9315792918205261,
|
|
"rewards/format_reward": 0.9860243082046509,
|
|
"rewards/frontier_aurc_reward": -0.0013923029648140073,
|
|
"rewards/frontier_ece_reward": 0.013070075027644634,
|
|
"rewards/frontier_entropy_batch_reward": -0.3352237045764923,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08040238320827484,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10813496261835098,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08040238320827484,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10813496261835098,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08040238320827484,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10813496261835098,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08040238320827484,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.10813496261835098,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08040238320827484,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.10813496261835098,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08040238320827484,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.10813496261835098,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08040238320827484,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10813496261835098,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008040238078683615,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1705186665058136,
|
|
"signal/accuracy_reward/group_std_mean": 0.22422258257865907,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0852593332529068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0852593332529068,
|
|
"signal/advantage_abs_mean": 0.09446865022182464,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09446865022182464,
|
|
"signal/advantage_pre_scale_std": 0.15316152572631836,
|
|
"signal/advantage_std": 0.15316152572631836,
|
|
"signal/brier_reward/centered_abs_mean": 0.140705406665802,
|
|
"signal/brier_reward/group_std_mean": 0.1837473154067993,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01407054141163826,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01407054141163826,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03692381903529167,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.060953890532255174,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036923819687217476,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036923819687217476,
|
|
"signal/format_reward/centered_abs_mean": 0.02344292551279068,
|
|
"signal/format_reward/group_std_mean": 0.045116296410560607,
|
|
"signal/format_reward/group_zero_std_frac": 0.8111111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01172146275639534,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01172146275639534,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015263660810887814,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002488213311880827,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.907957521325443e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.907957521325443e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02705363780260086,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03860641121864319,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027053637430071832,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027053637430071832,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34538384675979616,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41308689713478086,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03453838601708412,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03453838601708412,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24837072760216533,
|
|
"calibration/batch_distribution_entropy": 0.9368503432052853,
|
|
"calibration/buffer_distribution_entropy": 0.9108657331287386,
|
|
"calibration/confidence_entropy": 0.4455018146660613,
|
|
"calibration/coverage@0%": 0.04905653133468686,
|
|
"calibration/coverage@1%": 0.04905653133468686,
|
|
"calibration/coverage@10%": 0.3206688968562649,
|
|
"calibration/coverage@15%": 0.42620434852377453,
|
|
"calibration/coverage@20%": 0.5009301871200499,
|
|
"calibration/coverage@25%": 0.543208919992705,
|
|
"calibration/coverage@30%": 0.5796971706454465,
|
|
"calibration/coverage@5%": 0.18065346401915186,
|
|
"calibration/ece": 0.15013938661825454,
|
|
"calibration/mean_confidence": 0.6012813334516616,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009201388888888907,
|
|
"completions/max_length": 3410.2,
|
|
"completions/max_terminated_length": 3410.2,
|
|
"completions/mean_length": 718.92509765625,
|
|
"completions/mean_terminated_length": 725.5808715820312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 233.2,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.0007875562296248972,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.009,
|
|
"num_tokens": 255827985.0,
|
|
"reward": 0.9858316063880921,
|
|
"reward_std": 0.1202399954199791,
|
|
"rewards/accgated_coverage_0": 0.017084382567554714,
|
|
"rewards/accgated_coverage_1": 0.017084382567554714,
|
|
"rewards/accgated_coverage_10": 0.017084382567554714,
|
|
"rewards/accgated_coverage_15": 0.017084382567554714,
|
|
"rewards/accgated_coverage_20": 0.017084382567554714,
|
|
"rewards/accgated_coverage_25": 0.017084382567554714,
|
|
"rewards/accgated_coverage_5": 0.017084382567554714,
|
|
"rewards/accuracy_reward": 0.6697048544883728,
|
|
"rewards/brier_reward": 0.8128461956977844,
|
|
"rewards/confidence_uniqueness_reward": 0.9363534331321717,
|
|
"rewards/format_reward": 0.9907986044883728,
|
|
"rewards/frontier_aurc_reward": -0.0012421533348970116,
|
|
"rewards/frontier_ece_reward": 0.012260660342872143,
|
|
"rewards/frontier_entropy_batch_reward": -0.32509719729423525,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08290913850069045,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11078527420759202,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08290913850069045,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11078527420759202,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08290913850069045,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11078527420759202,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08290913850069045,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11078527420759202,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08290913850069045,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11078527420759202,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08290913850069045,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11078527420759202,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08290913850069045,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11078527420759202,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008290913514792919,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16545681655406952,
|
|
"signal/accuracy_reward/group_std_mean": 0.22034453451633454,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37222222685813905,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08272840827703476,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08272840827703476,
|
|
"signal/advantage_abs_mean": 0.0883379727602005,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0883379727602005,
|
|
"signal/advantage_pre_scale_std": 0.14179351627826692,
|
|
"signal/advantage_std": 0.14179351627826692,
|
|
"signal/brier_reward/centered_abs_mean": 0.1315285176038742,
|
|
"signal/brier_reward/group_std_mean": 0.1732201546430588,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013152851909399032,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013152851909399032,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030239152908325195,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.048340915143489836,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030239153653383254,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030239153653383254,
|
|
"signal/format_reward/centered_abs_mean": 0.01531032994389534,
|
|
"signal/format_reward/group_std_mean": 0.03024934194982052,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00765516497194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00765516497194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012988673988729715,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002020396827720106,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6235843395406847e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6235843395406847e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02591995447874069,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.037436506152153014,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002591995522379875,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002591995522379875,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.343667733669281,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41231095790863037,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034366774559021,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034366774559021,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16554464244255415,
|
|
"calibration/batch_distribution_entropy": 0.9212177521245579,
|
|
"calibration/buffer_distribution_entropy": 0.9138018599484876,
|
|
"calibration/confidence_entropy": 0.4598320839958225,
|
|
"calibration/coverage@0%": 0.031985780423280416,
|
|
"calibration/coverage@1%": 0.031985780423280416,
|
|
"calibration/coverage@10%": 0.3589368386243386,
|
|
"calibration/coverage@15%": 0.4688489466467215,
|
|
"calibration/coverage@20%": 0.5399296898806061,
|
|
"calibration/coverage@25%": 0.8059401316518464,
|
|
"calibration/coverage@30%": 0.8851748836532867,
|
|
"calibration/coverage@5%": 0.30297619047619045,
|
|
"calibration/ece": 0.11906561022394982,
|
|
"calibration/mean_confidence": 0.6288702607416632,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006597222222222232,
|
|
"completions/max_length": 3522.8,
|
|
"completions/max_terminated_length": 3522.8,
|
|
"completions/mean_length": 714.220751953125,
|
|
"completions/mean_terminated_length": 718.9944458007812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 212.6,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.0005939038819633424,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0071,
|
|
"num_tokens": 267173472.0,
|
|
"reward": 0.9884174346923829,
|
|
"reward_std": 0.11844182759523392,
|
|
"rewards/accgated_coverage_0": 0.01884926073253155,
|
|
"rewards/accgated_coverage_1": 0.01884926073253155,
|
|
"rewards/accgated_coverage_10": 0.01884926073253155,
|
|
"rewards/accgated_coverage_15": 0.01884926073253155,
|
|
"rewards/accgated_coverage_20": 0.01884926073253155,
|
|
"rewards/accgated_coverage_25": 0.01884926073253155,
|
|
"rewards/accgated_coverage_5": 0.01884926073253155,
|
|
"rewards/accuracy_reward": 0.6723090291023255,
|
|
"rewards/brier_reward": 0.8303997039794921,
|
|
"rewards/confidence_uniqueness_reward": 0.9367879986763,
|
|
"rewards/format_reward": 0.9934027791023254,
|
|
"rewards/frontier_aurc_reward": -0.0010956356301903725,
|
|
"rewards/frontier_ece_reward": 0.012315450236201286,
|
|
"rewards/frontier_entropy_batch_reward": -0.35569567084312437,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07545997649431228,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10163306593894958,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07545997649431228,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10163306593894958,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07545997649431228,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10163306593894958,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.07545997649431228,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.10163306593894958,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.07545997649431228,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.10163306593894958,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.07545997649431228,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.10163306593894958,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07545997649431228,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10163306593894958,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00754599766805768,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16522895097732543,
|
|
"signal/accuracy_reward/group_std_mean": 0.22105098962783815,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08261447548866271,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08261447548866271,
|
|
"signal/advantage_abs_mean": 0.08631904572248458,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08631904572248458,
|
|
"signal/advantage_pre_scale_std": 0.13653431832790375,
|
|
"signal/advantage_std": 0.13653431832790375,
|
|
"signal/brier_reward/centered_abs_mean": 0.12054249793291091,
|
|
"signal/brier_reward/group_std_mean": 0.1602459281682968,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01205424964427948,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01205424964427948,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027296838536858558,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04333715438842774,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027296837884932756,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027296837884932756,
|
|
"signal/format_reward/centered_abs_mean": 0.011990017350763082,
|
|
"signal/format_reward/group_std_mean": 0.02509169690310955,
|
|
"signal/format_reward/group_zero_std_frac": 0.8888889074325561,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005995008675381541,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005995008675381541,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012402797234244644,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020731140859425066,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5503496979363262e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5503496979363262e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023925036564469336,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03440321609377861,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023925038054585456,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023925038054585456,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3451420783996582,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4122345566749573,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034514208883047105,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034514208883047105,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21284186071868588,
|
|
"calibration/batch_distribution_entropy": 0.9339286249438772,
|
|
"calibration/buffer_distribution_entropy": 0.9148087399287421,
|
|
"calibration/confidence_entropy": 0.45447642600743754,
|
|
"calibration/coverage@0%": 0.03196024284568126,
|
|
"calibration/coverage@1%": 0.03196024284568126,
|
|
"calibration/coverage@10%": 0.21866415120586336,
|
|
"calibration/coverage@15%": 0.34623181344678866,
|
|
"calibration/coverage@20%": 0.439894685215483,
|
|
"calibration/coverage@25%": 0.7419993547570419,
|
|
"calibration/coverage@30%": 0.8557840171963754,
|
|
"calibration/coverage@5%": 0.1316335258728029,
|
|
"calibration/ece": 0.1146119336676791,
|
|
"calibration/mean_confidence": 0.5895418090310146,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013107638888888884,
|
|
"completions/max_length": 3611.6,
|
|
"completions/max_terminated_length": 3611.6,
|
|
"completions/mean_length": 745.3184204101562,
|
|
"completions/mean_terminated_length": 755.2761962890625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 211.4,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.0004161697579547763,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0135,
|
|
"num_tokens": 278884340.0,
|
|
"reward": 0.9715276241302491,
|
|
"reward_std": 0.1289304807782173,
|
|
"rewards/accgated_coverage_0": 0.020680619217455388,
|
|
"rewards/accgated_coverage_1": 0.020680619217455388,
|
|
"rewards/accgated_coverage_10": 0.020680619217455388,
|
|
"rewards/accgated_coverage_15": 0.020680619217455388,
|
|
"rewards/accgated_coverage_20": 0.020680619217455388,
|
|
"rewards/accgated_coverage_25": 0.020680619217455388,
|
|
"rewards/accgated_coverage_5": 0.020680619217455388,
|
|
"rewards/accuracy_reward": 0.6413194537162781,
|
|
"rewards/brier_reward": 0.8110544085502625,
|
|
"rewards/confidence_uniqueness_reward": 0.9331743836402893,
|
|
"rewards/format_reward": 0.98671875,
|
|
"rewards/frontier_aurc_reward": -0.0013186614960432053,
|
|
"rewards/frontier_ece_reward": 0.010826228372752666,
|
|
"rewards/frontier_entropy_batch_reward": -0.3245693564414978,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0793161392211914,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10691076219081878,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0793161392211914,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10691076219081878,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.0793161392211914,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10691076219081878,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0793161392211914,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.10691076219081878,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.0793161392211914,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.10691076219081878,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0793161392211914,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.10691076219081878,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0793161392211914,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10691076219081878,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007931614108383656,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18987630307674408,
|
|
"signal/accuracy_reward/group_std_mean": 0.2467512845993042,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.30555556416511537,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09493815153837204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09493815153837204,
|
|
"signal/advantage_abs_mean": 0.09621814787387847,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09621814787387847,
|
|
"signal/advantage_pre_scale_std": 0.15054749250411986,
|
|
"signal/advantage_std": 0.15054749250411986,
|
|
"signal/brier_reward/centered_abs_mean": 0.1319481372833252,
|
|
"signal/brier_reward/group_std_mean": 0.17350344955921174,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013194814324378967,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013194814324378967,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034258627146482465,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.052195188403129575,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034258626867085694,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034258626867085694,
|
|
"signal/format_reward/centered_abs_mean": 0.021001519076526164,
|
|
"signal/format_reward/group_std_mean": 0.03613746054470539,
|
|
"signal/format_reward/group_zero_std_frac": 0.8583333373069764,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010500759538263082,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010500759538263082,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001310308533720672,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00207103060092777,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.637885670788819e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.637885670788819e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023271889239549638,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.033554903045296666,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023271888960152863,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023271888960152863,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33475651144981383,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4035548448562622,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03347565159201622,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03347565159201622,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18618817491166895,
|
|
"calibration/batch_distribution_entropy": 0.9298071065522094,
|
|
"calibration/buffer_distribution_entropy": 0.9178997148527944,
|
|
"calibration/confidence_entropy": 0.4370655181611058,
|
|
"calibration/coverage@0%": 0.12626696858865133,
|
|
"calibration/coverage@1%": 0.15455687624036638,
|
|
"calibration/coverage@10%": 0.3530733727917713,
|
|
"calibration/coverage@15%": 0.42093601050412605,
|
|
"calibration/coverage@20%": 0.5813107921886217,
|
|
"calibration/coverage@25%": 0.6828284484205971,
|
|
"calibration/coverage@30%": 0.7786590645203598,
|
|
"calibration/coverage@5%": 0.28417016779709464,
|
|
"calibration/ece": 0.14236897055875175,
|
|
"calibration/mean_confidence": 0.6090604705723687,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009114583333333348,
|
|
"completions/max_length": 3660.6,
|
|
"completions/max_terminated_length": 3660.6,
|
|
"completions/mean_length": 734.3580810546875,
|
|
"completions/mean_terminated_length": 741.1240966796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 203.4,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.00042264023795723915,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.01,
|
|
"num_tokens": 290437169.0,
|
|
"reward": 0.9915942430496216,
|
|
"reward_std": 0.11871129423379898,
|
|
"rewards/accgated_coverage_0": 0.02424356509000063,
|
|
"rewards/accgated_coverage_1": 0.02424356509000063,
|
|
"rewards/accgated_coverage_10": 0.02424356509000063,
|
|
"rewards/accgated_coverage_15": 0.02424356509000063,
|
|
"rewards/accgated_coverage_20": 0.02424356509000063,
|
|
"rewards/accgated_coverage_25": 0.02424356509000063,
|
|
"rewards/accgated_coverage_5": 0.02424356509000063,
|
|
"rewards/accuracy_reward": 0.66953125,
|
|
"rewards/brier_reward": 0.8216690421104431,
|
|
"rewards/confidence_uniqueness_reward": 0.935891056060791,
|
|
"rewards/format_reward": 0.9908854246139527,
|
|
"rewards/frontier_aurc_reward": -0.001132953108754009,
|
|
"rewards/frontier_ece_reward": 0.011347188241779804,
|
|
"rewards/frontier_entropy_batch_reward": -0.32461191415786744,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08618276119232178,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11529500484466552,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08618276119232178,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11529500484466552,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08618276119232178,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11529500484466552,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08618276119232178,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11529500484466552,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.08618276119232178,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.11529500484466552,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.08618276119232178,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.11529500484466552,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08618276119232178,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11529500484466552,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008618275728076696,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17056748867034913,
|
|
"signal/accuracy_reward/group_std_mean": 0.22679646909236909,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35277777910232544,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08528374433517456,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08528374433517456,
|
|
"signal/advantage_abs_mean": 0.0854735866189003,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0854735866189003,
|
|
"signal/advantage_pre_scale_std": 0.13904949724674226,
|
|
"signal/advantage_std": 0.13904949724674226,
|
|
"signal/brier_reward/centered_abs_mean": 0.12689192742109298,
|
|
"signal/brier_reward/group_std_mean": 0.16868027150630951,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012689193338155746,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012689193338155746,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030863118171691895,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05242802649736404,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030863119289278986,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030863119289278986,
|
|
"signal/format_reward/centered_abs_mean": 0.01650933176279068,
|
|
"signal/format_reward/group_std_mean": 0.03543390221893787,
|
|
"signal/format_reward/group_zero_std_frac": 0.8388888835906982,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00825466588139534,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00825466588139534,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012018611654639245,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0018514725845307112,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.502326558693312e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.502326558693312e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023784752935171127,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03346829637885094,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00237847538664937,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00237847538664937,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3354430079460144,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40641710758209226,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033544300496578215,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033544300496578215,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10472164908366191,
|
|
"calibration/batch_distribution_entropy": 0.9028221156209602,
|
|
"calibration/buffer_distribution_entropy": 0.9232906072427312,
|
|
"calibration/confidence_entropy": 0.420103138724377,
|
|
"calibration/coverage@0%": 0.07498175326675494,
|
|
"calibration/coverage@1%": 0.14625834901143578,
|
|
"calibration/coverage@10%": 0.6354837792768773,
|
|
"calibration/coverage@15%": 0.7214461153664524,
|
|
"calibration/coverage@20%": 0.8011192019352944,
|
|
"calibration/coverage@25%": 0.8792893660204146,
|
|
"calibration/coverage@30%": 0.9623549569746952,
|
|
"calibration/coverage@5%": 0.4253273435119567,
|
|
"calibration/ece": 0.09601834271891607,
|
|
"calibration/mean_confidence": 0.6272844677653694,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013194444444444463,
|
|
"completions/max_length": 3399.4,
|
|
"completions/max_terminated_length": 3399.4,
|
|
"completions/mean_length": 726.5508911132813,
|
|
"completions/mean_terminated_length": 736.2290771484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 229.0,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.0003059864102397114,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0134,
|
|
"num_tokens": 301911259.0,
|
|
"reward": 0.9859672069549561,
|
|
"reward_std": 0.12308523058891296,
|
|
"rewards/accgated_coverage_0": 0.02728597857058048,
|
|
"rewards/accgated_coverage_1": 0.02728597857058048,
|
|
"rewards/accgated_coverage_10": 0.02728597857058048,
|
|
"rewards/accgated_coverage_15": 0.02728597857058048,
|
|
"rewards/accgated_coverage_20": 0.02728597857058048,
|
|
"rewards/accgated_coverage_25": 0.025706437602639198,
|
|
"rewards/accgated_coverage_5": 0.02728597857058048,
|
|
"rewards/accuracy_reward": 0.6684027671813965,
|
|
"rewards/brier_reward": 0.8261925935745239,
|
|
"rewards/confidence_uniqueness_reward": 0.9272140622138977,
|
|
"rewards/format_reward": 0.98671875,
|
|
"rewards/frontier_aurc_reward": -0.0011251185205765069,
|
|
"rewards/frontier_ece_reward": 0.011587263457477093,
|
|
"rewards/frontier_entropy_batch_reward": -0.3702110886573792,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07848915457725525,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.1055976927280426,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07848915457725525,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.1055976927280426,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07848915457725525,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.1055976927280426,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.07848915457725525,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.1055976927280426,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.07848915457725525,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.1055976927280426,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.06995586454868316,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.0945655107498169,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0069955865852534774,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0069955865852534774,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07848915457725525,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.1055976927280426,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007848915364593267,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1587456613779068,
|
|
"signal/accuracy_reward/group_std_mean": 0.21405453681945802,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0793728306889534,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0793728306889534,
|
|
"signal/advantage_abs_mean": 0.08728917539119721,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08728917539119721,
|
|
"signal/advantage_pre_scale_std": 0.14750308096408843,
|
|
"signal/advantage_std": 0.14750308096408843,
|
|
"signal/brier_reward/centered_abs_mean": 0.1274958610534668,
|
|
"signal/brier_reward/group_std_mean": 0.16804315745830536,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012749586440622806,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012749586440622806,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03858770914375782,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06330646127462387,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003858770988881588,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003858770988881588,
|
|
"signal/format_reward/centered_abs_mean": 0.022553168796002866,
|
|
"signal/format_reward/group_std_mean": 0.044466794654726985,
|
|
"signal/format_reward/group_zero_std_frac": 0.8083333492279052,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011276584398001433,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011276584398001433,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013644436025060714,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002245521126314998,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7055545686162078e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7055545686162078e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.022616703435778616,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03189887069165707,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002261670376174152,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002261670376174152,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33584593534469603,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.407626211643219,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0335845947265625,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0335845947265625,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14606667298627013,
|
|
"calibration/batch_distribution_entropy": 0.964540454743454,
|
|
"calibration/buffer_distribution_entropy": 0.9351459849109356,
|
|
"calibration/confidence_entropy": 0.45604566754714054,
|
|
"calibration/coverage@0%": 0.06742711008004583,
|
|
"calibration/coverage@1%": 0.08357294341337915,
|
|
"calibration/coverage@10%": 0.4891351777947418,
|
|
"calibration/coverage@15%": 0.60078833663897,
|
|
"calibration/coverage@20%": 0.6978462014212975,
|
|
"calibration/coverage@25%": 0.7915759806586891,
|
|
"calibration/coverage@30%": 0.8538409493047343,
|
|
"calibration/coverage@5%": 0.31591807622855894,
|
|
"calibration/ece": 0.153603334809968,
|
|
"calibration/mean_confidence": 0.5470196795355173,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009374999999999977,
|
|
"completions/max_length": 3416.8,
|
|
"completions/max_terminated_length": 3416.8,
|
|
"completions/mean_length": 714.3645141601562,
|
|
"completions/mean_terminated_length": 721.1787475585937,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 234.4,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.0002952328941319138,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 313205346.0,
|
|
"reward": 0.9970819234848023,
|
|
"reward_std": 0.11172881275415421,
|
|
"rewards/accgated_coverage_0": 0.015393723733723164,
|
|
"rewards/accgated_coverage_1": 0.015393723733723164,
|
|
"rewards/accgated_coverage_10": 0.015393723733723164,
|
|
"rewards/accgated_coverage_15": 0.015393723733723164,
|
|
"rewards/accgated_coverage_20": 0.01588248461484909,
|
|
"rewards/accgated_coverage_25": 0.020709260366857052,
|
|
"rewards/accgated_coverage_5": 0.015393723733723164,
|
|
"rewards/accuracy_reward": 0.6959201455116272,
|
|
"rewards/brier_reward": 0.8239673137664795,
|
|
"rewards/confidence_uniqueness_reward": 0.9359864592552185,
|
|
"rewards/format_reward": 0.9906249880790711,
|
|
"rewards/frontier_aurc_reward": -0.0010717614088207484,
|
|
"rewards/frontier_ece_reward": 0.0071692907251417635,
|
|
"rewards/frontier_entropy_batch_reward": -0.34245588183403014,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08704878985881806,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11625861674547196,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08704878985881806,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11625861674547196,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08704878985881806,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11625861674547196,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.08704878985881806,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.11625861674547196,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.0803851142525673,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.10777752846479416,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.008038511220365762,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.008038511220365762,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04402193687856197,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.05933395996689796,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004402193846181035,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004402193846181035,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08704878985881806,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11625861674547196,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008704879134893418,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14832357168197632,
|
|
"signal/accuracy_reward/group_std_mean": 0.20283843576908112,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07416178584098816,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07416178584098816,
|
|
"signal/advantage_abs_mean": 0.0811191275715828,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0811191275715828,
|
|
"signal/advantage_pre_scale_std": 0.13390319645404816,
|
|
"signal/advantage_std": 0.13390319645404816,
|
|
"signal/brier_reward/centered_abs_mean": 0.12230742424726486,
|
|
"signal/brier_reward/group_std_mean": 0.16233535408973693,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012230742909014224,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012230742909014224,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03038500025868416,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04836958795785904,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00303850001655519,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00303850001655519,
|
|
"signal/format_reward/centered_abs_mean": 0.016276041604578496,
|
|
"signal/format_reward/group_std_mean": 0.03133721351623535,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008138020802289248,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008138020802289248,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001202726038172841,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002039621490985155,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5034074567665812e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5034074567665812e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.018114964291453362,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025317597761750223,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018114965176209807,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018114965176209807,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34314724802970886,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4112194418907166,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034314725548028946,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034314725548028946,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1502133761388545,
|
|
"calibration/batch_distribution_entropy": 0.9393864979003841,
|
|
"calibration/buffer_distribution_entropy": 0.9458424403735097,
|
|
"calibration/confidence_entropy": 0.44937923792070256,
|
|
"calibration/coverage@0%": 0.09901511084757157,
|
|
"calibration/coverage@1%": 0.16974560199691088,
|
|
"calibration/coverage@10%": 0.5041888110998058,
|
|
"calibration/coverage@15%": 0.5857952791984206,
|
|
"calibration/coverage@20%": 0.6541716232657138,
|
|
"calibration/coverage@25%": 0.7284662615672963,
|
|
"calibration/coverage@30%": 0.81490193267646,
|
|
"calibration/coverage@5%": 0.3860676982142951,
|
|
"calibration/ece": 0.1404094438977152,
|
|
"calibration/mean_confidence": 0.5738136615962789,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010069444444444442,
|
|
"completions/max_length": 3607.2,
|
|
"completions/max_terminated_length": 3607.2,
|
|
"completions/mean_length": 781.7636474609375,
|
|
"completions/mean_terminated_length": 789.7067504882813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 237.8,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.00030073069501668215,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0096,
|
|
"num_tokens": 325321599.0,
|
|
"reward": 0.9943321108818054,
|
|
"reward_std": 0.12259746938943863,
|
|
"rewards/accgated_coverage_0": 0.021769443340599538,
|
|
"rewards/accgated_coverage_1": 0.021769443340599538,
|
|
"rewards/accgated_coverage_10": 0.021769443340599538,
|
|
"rewards/accgated_coverage_15": 0.021933466009795666,
|
|
"rewards/accgated_coverage_20": 0.021304438635706902,
|
|
"rewards/accgated_coverage_25": 0.04014414809644222,
|
|
"rewards/accgated_coverage_5": 0.021769443340599538,
|
|
"rewards/accuracy_reward": 0.6783854126930237,
|
|
"rewards/brier_reward": 0.8334152817726135,
|
|
"rewards/confidence_uniqueness_reward": 0.9337161183357239,
|
|
"rewards/format_reward": 0.9897569417953491,
|
|
"rewards/frontier_aurc_reward": -0.0011517940787598492,
|
|
"rewards/frontier_ece_reward": 0.00666016167961061,
|
|
"rewards/frontier_entropy_batch_reward": -0.3414980471134186,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08002641052007675,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10786933153867721,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08002641052007675,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10786933153867721,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.08002641052007675,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10786933153867721,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.07641349881887435,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.10338671654462814,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.007641350477933883,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.007641350477933883,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04975445568561554,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.06870571970939636,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004975445568561554,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004975445568561554,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.03025592640042305,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.03869783394038677,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003025592723861337,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003025592723861337,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08002641052007675,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10786933153867721,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008002641331404447,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17113172709941865,
|
|
"signal/accuracy_reward/group_std_mean": 0.2234587401151657,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08556586354970933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08556586354970933,
|
|
"signal/advantage_abs_mean": 0.0890174686908722,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0890174686908722,
|
|
"signal/advantage_pre_scale_std": 0.14541475772857665,
|
|
"signal/advantage_std": 0.14541475772857665,
|
|
"signal/brier_reward/centered_abs_mean": 0.12310948371887206,
|
|
"signal/brier_reward/group_std_mean": 0.1644404262304306,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012310948595404625,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012310948595404625,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03251851163804531,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.053677086532115934,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032518512103706597,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032518512103706597,
|
|
"signal/format_reward/centered_abs_mean": 0.01814236119389534,
|
|
"signal/format_reward/group_std_mean": 0.03656843528151512,
|
|
"signal/format_reward/group_zero_std_frac": 0.8416666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00907118059694767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00907118059694767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015089602209627628,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025732704903930425,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8862002616515382e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8862002616515382e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014805944077670575,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.019813685864210128,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014805944636464119,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014805944636464119,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32585235834121706,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3952793776988983,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03258523568511009,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03258523568511009,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.16763761841652935,
|
|
"eval_calibration/batch_distribution_entropy": 0.8990557588565634,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9513564885877245,
|
|
"eval_calibration/confidence_entropy": 0.4349930828583795,
|
|
"eval_calibration/coverage@0%": 0.22465277777777778,
|
|
"eval_calibration/coverage@1%": 0.22465277777777778,
|
|
"eval_calibration/coverage@10%": 0.34965277777777776,
|
|
"eval_calibration/coverage@15%": 0.5496527777777778,
|
|
"eval_calibration/coverage@20%": 0.7135416666666666,
|
|
"eval_calibration/coverage@25%": 0.8722222222222222,
|
|
"eval_calibration/coverage@30%": 0.9406249999999999,
|
|
"eval_calibration/coverage@5%": 0.33402777777777776,
|
|
"eval_calibration/ece": 0.22026682861937405,
|
|
"eval_calibration/mean_confidence": 0.5920676732220265,
|
|
"eval_completions/clipped_ratio": 0.006944444444444438,
|
|
"eval_completions/max_length": 2337.6666666666665,
|
|
"eval_completions/max_terminated_length": 2337.6666666666665,
|
|
"eval_completions/mean_length": 748.0566202799479,
|
|
"eval_completions/mean_terminated_length": 753.2368876139323,
|
|
"eval_completions/min_length": 101.66666666666667,
|
|
"eval_completions/min_terminated_length": 279.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 325321599.0,
|
|
"eval_reward": 0.916528731584549,
|
|
"eval_reward_std": 0.2074477275212606,
|
|
"eval_rewards/accgated_coverage_0": 0.017190332369258005,
|
|
"eval_rewards/accgated_coverage_1": 0.017190332369258005,
|
|
"eval_rewards/accgated_coverage_10": 0.017190332369258005,
|
|
"eval_rewards/accgated_coverage_15": 0.01790264039300382,
|
|
"eval_rewards/accgated_coverage_20": 0.01782215495283405,
|
|
"eval_rewards/accgated_coverage_25": 0.04417319502681494,
|
|
"eval_rewards/accgated_coverage_5": 0.017190332369258005,
|
|
"eval_rewards/accuracy_reward": 0.6701388855775198,
|
|
"eval_rewards/brier_reward": 0.8126361072063446,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8869705498218536,
|
|
"eval_rewards/format_reward": 0.9904513855775198,
|
|
"eval_rewards/frontier_aurc_reward": -0.0019800245742468783,
|
|
"eval_rewards/frontier_ece_reward": 0.004768568052289386,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9904513855775198,
|
|
"eval_runtime": 200.2087,
|
|
"eval_samples_per_second": 4.995,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.1304701641201973,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.18650069584449133,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.1304701641201973,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.18650069584449133,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.1304701641201973,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.18650069584449133,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.10964768255750339,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.15953792383273444,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.010964768783499798,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.010964768783499798,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.057131893932819366,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.08694148808717728,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0057131896416346235,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0057131896416346235,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.05311373248696327,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.06559204993148644,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005311373310784499,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005311373310784499,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.1304701641201973,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.18650069584449133,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.013047016536196073,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4261067758003871,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4669964363177617,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21305338790019354,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21305338790019354,
|
|
"eval_signal/advantage_abs_mean": 0.17535198479890823,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.17535198479890823,
|
|
"eval_signal/advantage_pre_scale_std": 0.20822140822807947,
|
|
"eval_signal/advantage_std": 0.20822140822807947,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1969471424818039,
|
|
"eval_signal/brier_reward/group_std_mean": 0.25743385901053745,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019694714496533077,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019694714496533077,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05258619785308838,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07910802401602268,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005258619707698624,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005258619707698624,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.018174913246184587,
|
|
"eval_signal/format_reward/group_std_mean": 0.045046874321997166,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009087456623092294,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.009087456623092294,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033884466198893883,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008018870799181363,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2355582081654575e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2355582081654575e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.017925683719416458,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.02367624578376611,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017925684223882854,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017925684223882854,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018174913246184587,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.045046874321997166,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777778009573618,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018174914099896948,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018174914099896948,
|
|
"eval_steps_per_second": 0.03,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.13959037762186857,
|
|
"calibration/batch_distribution_entropy": 0.9571234233060555,
|
|
"calibration/buffer_distribution_entropy": 0.9544348820115424,
|
|
"calibration/confidence_entropy": 0.47877571143833497,
|
|
"calibration/coverage@0%": 0.04338049940871343,
|
|
"calibration/coverage@1%": 0.04338049940871343,
|
|
"calibration/coverage@10%": 0.5388304666397344,
|
|
"calibration/coverage@15%": 0.660080229138832,
|
|
"calibration/coverage@20%": 0.7443303760611322,
|
|
"calibration/coverage@25%": 0.8514756742717319,
|
|
"calibration/coverage@30%": 0.9032110781512721,
|
|
"calibration/coverage@5%": 0.23531781607810212,
|
|
"calibration/ece": 0.147932460641434,
|
|
"calibration/mean_confidence": 0.5904228748409285,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006944444444444442,
|
|
"completions/max_length": 3571.6,
|
|
"completions/max_terminated_length": 3571.6,
|
|
"completions/mean_length": 734.038525390625,
|
|
"completions/mean_terminated_length": 739.1866455078125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 247.6,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.00034539776970632374,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0059,
|
|
"num_tokens": 336885435.0,
|
|
"reward": 1.0162927865982057,
|
|
"reward_std": 0.117668616771698,
|
|
"rewards/accgated_coverage_0": 0.008730353973805904,
|
|
"rewards/accgated_coverage_1": 0.008730353973805904,
|
|
"rewards/accgated_coverage_10": 0.008730353973805904,
|
|
"rewards/accgated_coverage_15": 0.012687907461076975,
|
|
"rewards/accgated_coverage_20": 0.023076852411031724,
|
|
"rewards/accgated_coverage_25": 0.06760275661945343,
|
|
"rewards/accgated_coverage_5": 0.008730353973805904,
|
|
"rewards/accuracy_reward": 0.72109375,
|
|
"rewards/brier_reward": 0.8338244438171387,
|
|
"rewards/confidence_uniqueness_reward": 0.9384856462478638,
|
|
"rewards/format_reward": 0.9928819537162781,
|
|
"rewards/frontier_aurc_reward": -0.0010542726842686534,
|
|
"rewards/frontier_ece_reward": 0.0029879425885155795,
|
|
"rewards/frontier_entropy_batch_reward": -0.3204062461853027,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.09136468917131424,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.12313442379236221,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.09136468917131424,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.12313442379236221,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.09136468917131424,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.12313442379236221,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.06817464828491211,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.09295286387205123,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.006817464809864759,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.006817464809864759,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03550227433443069,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.047702183574438096,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.003550227452069521,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.003550227452069521,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.03964427635073662,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.05114280804991722,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.003964427672326565,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.003964427672326565,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.09136468917131424,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.12313442379236221,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009136468544602394,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17602538764476777,
|
|
"signal/accuracy_reward/group_std_mean": 0.22985662817955016,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35833333134651185,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08801269382238389,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08801269382238389,
|
|
"signal/advantage_abs_mean": 0.08501027822494507,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08501027822494507,
|
|
"signal/advantage_pre_scale_std": 0.13985675275325776,
|
|
"signal/advantage_std": 0.13985675275325776,
|
|
"signal/brier_reward/centered_abs_mean": 0.11954725980758667,
|
|
"signal/brier_reward/group_std_mean": 0.15919876992702484,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01195472627878189,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01195472627878189,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0272883802652359,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04573171883821488,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027288380078971386,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027288380078971386,
|
|
"signal/format_reward/centered_abs_mean": 0.01296657994389534,
|
|
"signal/format_reward/group_std_mean": 0.02831004709005356,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00648328997194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00648328997194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015273221535608173,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00275028171017766,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9091527065029368e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9091527065029368e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012682820670306683,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.016651974245905875,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012682820903137325,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012682820903137325,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3162439942359924,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38905380964279174,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03162440098822117,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03162440098822117,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10743571870653561,
|
|
"calibration/batch_distribution_entropy": 0.9115237818660422,
|
|
"calibration/buffer_distribution_entropy": 0.9605668145637967,
|
|
"calibration/confidence_entropy": 0.46404297224501123,
|
|
"calibration/coverage@0%": 0.12863544602502677,
|
|
"calibration/coverage@1%": 0.278052421405394,
|
|
"calibration/coverage@10%": 0.6681111929140036,
|
|
"calibration/coverage@15%": 0.7555172651477655,
|
|
"calibration/coverage@20%": 0.8412781445165628,
|
|
"calibration/coverage@25%": 0.8842789451144542,
|
|
"calibration/coverage@30%": 0.9047619047619048,
|
|
"calibration/coverage@5%": 0.5154270129062687,
|
|
"calibration/ece": 0.14810043813093507,
|
|
"calibration/mean_confidence": 0.6534314587185783,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010329861111111116,
|
|
"completions/max_length": 3365.0,
|
|
"completions/max_terminated_length": 3365.0,
|
|
"completions/mean_length": 738.4152099609375,
|
|
"completions/mean_terminated_length": 746.1383666992188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 224.0,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.00032688508508726954,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0093,
|
|
"num_tokens": 348479274.0,
|
|
"reward": 0.9841222167015076,
|
|
"reward_std": 0.12128061801195145,
|
|
"rewards/accgated_coverage_0": 0.015495671518146991,
|
|
"rewards/accgated_coverage_1": 0.015495671518146991,
|
|
"rewards/accgated_coverage_10": 0.015495671518146991,
|
|
"rewards/accgated_coverage_15": 0.015093481354415416,
|
|
"rewards/accgated_coverage_20": 0.024507426470518113,
|
|
"rewards/accgated_coverage_25": 0.06457818299531937,
|
|
"rewards/accgated_coverage_5": 0.015495671518146991,
|
|
"rewards/accuracy_reward": 0.6630208373069764,
|
|
"rewards/brier_reward": 0.8221668839454651,
|
|
"rewards/confidence_uniqueness_reward": 0.9338045358657837,
|
|
"rewards/format_reward": 0.9896701455116272,
|
|
"rewards/frontier_aurc_reward": -0.0016030759084969758,
|
|
"rewards/frontier_ece_reward": 0.0035916581749916078,
|
|
"rewards/frontier_entropy_batch_reward": -0.3477570950984955,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0703942283987999,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.09567773193120957,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0703942283987999,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.09567773193120957,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.0703942283987999,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.09567773193120957,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.0434797465801239,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.06042725443840027,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0043479747138917444,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0043479747138917444,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.025829650834202765,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03384415283799171,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0025829650927335023,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0025829650927335023,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.03724170736968517,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.04777343571186066,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0037241708021610977,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0037241708021610977,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0703942283987999,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.09567773193120957,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.0070394231006503105,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16209852695465088,
|
|
"signal/accuracy_reward/group_std_mean": 0.21041282415390014,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08104926347732544,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08104926347732544,
|
|
"signal/advantage_abs_mean": 0.09037692099809647,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09037692099809647,
|
|
"signal/advantage_pre_scale_std": 0.14657594561576842,
|
|
"signal/advantage_std": 0.14657594561576842,
|
|
"signal/brier_reward/centered_abs_mean": 0.11801843196153641,
|
|
"signal/brier_reward/group_std_mean": 0.1553761065006256,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011801843531429768,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011801843531429768,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03203802034258842,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.051499532908201216,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003203802043572068,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003203802043572068,
|
|
"signal/format_reward/centered_abs_mean": 0.01785481758415699,
|
|
"signal/format_reward/group_std_mean": 0.03447670228779316,
|
|
"signal/format_reward/group_zero_std_frac": 0.8555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008927408792078494,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008927408792078494,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017638769699260593,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029752728529274463,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2048461687518284e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2048461687518284e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011287710629403591,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01489656399935484,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011287711327895521,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011287711327895521,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3267548501491547,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39591810703277586,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03267548531293869,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03267548531293869,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.13707453862890023,
|
|
"calibration/batch_distribution_entropy": 0.9393998078072607,
|
|
"calibration/buffer_distribution_entropy": 0.963943720408478,
|
|
"calibration/confidence_entropy": 0.4556464069393563,
|
|
"calibration/coverage@0%": 0.11347585474513056,
|
|
"calibration/coverage@1%": 0.17646798072938252,
|
|
"calibration/coverage@10%": 0.5324355832987981,
|
|
"calibration/coverage@15%": 0.6414469246442879,
|
|
"calibration/coverage@20%": 0.7144313147534189,
|
|
"calibration/coverage@25%": 0.7613995614035087,
|
|
"calibration/coverage@30%": 0.8631929824561404,
|
|
"calibration/coverage@5%": 0.36714439494405304,
|
|
"calibration/ece": 0.12891001371722027,
|
|
"calibration/mean_confidence": 0.5747154789833697,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010416666666666675,
|
|
"completions/max_length": 3487.2,
|
|
"completions/max_terminated_length": 3487.2,
|
|
"completions/mean_length": 753.5942016601563,
|
|
"completions/mean_terminated_length": 761.6573120117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 244.2,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.00033790257293730974,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.0077,
|
|
"num_tokens": 360299751.0,
|
|
"reward": 0.9920899629592895,
|
|
"reward_std": 0.12097463011741638,
|
|
"rewards/accgated_coverage_0": 0.025350801181048155,
|
|
"rewards/accgated_coverage_1": 0.025350801181048155,
|
|
"rewards/accgated_coverage_10": 0.025350801181048155,
|
|
"rewards/accgated_coverage_15": 0.023273496888577938,
|
|
"rewards/accgated_coverage_20": 0.03261325098574162,
|
|
"rewards/accgated_coverage_25": 0.07034600675106048,
|
|
"rewards/accgated_coverage_5": 0.025350801181048155,
|
|
"rewards/accuracy_reward": 0.6567708373069763,
|
|
"rewards/brier_reward": 0.8295871496200562,
|
|
"rewards/confidence_uniqueness_reward": 0.936906611919403,
|
|
"rewards/format_reward": 0.9895833253860473,
|
|
"rewards/frontier_aurc_reward": -0.001301741786301136,
|
|
"rewards/frontier_ece_reward": 0.003387619974091649,
|
|
"rewards/frontier_entropy_batch_reward": -0.30822600722312926,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07112332507967949,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.09761943519115449,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07112332507967949,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.09761943519115449,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07112332507967949,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.09761943519115449,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03819368332624436,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.0530736930668354,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0038193685468286276,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0038193685468286276,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.025878940895199775,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.03326713815331459,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.002587894257158041,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.002587894257158041,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.03837202824652195,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.05023747906088829,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.0038372030016034842,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.0038372030016034842,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07112332507967949,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.09761943519115449,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00711233289912343,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15069444477558136,
|
|
"signal/accuracy_reward/group_std_mean": 0.20632360279560089,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07534722238779068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07534722238779068,
|
|
"signal/advantage_abs_mean": 0.08762808591127395,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08762808591127395,
|
|
"signal/advantage_pre_scale_std": 0.14219435751438142,
|
|
"signal/advantage_std": 0.14219435751438142,
|
|
"signal/brier_reward/centered_abs_mean": 0.11296486258506774,
|
|
"signal/brier_reward/group_std_mean": 0.1534811317920685,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011296486295759679,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011296486295759679,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03022078201174736,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.050181590020656586,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003022078238427639,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003022078238427639,
|
|
"signal/format_reward/centered_abs_mean": 0.017621527798473834,
|
|
"signal/format_reward/group_std_mean": 0.03512752801179886,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008810763899236917,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008810763899236917,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014444491360336542,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002510636835359037,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8055615328194108e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8055615328194108e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010616243071854114,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014283826760947704,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010616243351250886,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010616243351250886,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32418252229690553,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39595122933387755,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0324182540178299,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0324182540178299,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.09379690694605691,
|
|
"calibration/batch_distribution_entropy": 0.8875218762767598,
|
|
"calibration/buffer_distribution_entropy": 0.9656728923508411,
|
|
"calibration/confidence_entropy": 0.44091941057216244,
|
|
"calibration/coverage@0%": 0.06576609847566676,
|
|
"calibration/coverage@1%": 0.1662869318090001,
|
|
"calibration/coverage@10%": 0.7042785981870985,
|
|
"calibration/coverage@15%": 0.797162939172109,
|
|
"calibration/coverage@20%": 0.8705423172727755,
|
|
"calibration/coverage@25%": 0.9271389712655221,
|
|
"calibration/coverage@30%": 0.96880830522678,
|
|
"calibration/coverage@5%": 0.33866477844879533,
|
|
"calibration/ece": 0.08406840250541019,
|
|
"calibration/mean_confidence": 0.6761357102230366,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.007986111111111093,
|
|
"completions/max_length": 3577.8,
|
|
"completions/max_terminated_length": 3577.8,
|
|
"completions/mean_length": 725.5506103515625,
|
|
"completions/mean_terminated_length": 731.4747314453125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 232.2,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.0004053888551425189,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0075,
|
|
"num_tokens": 371747278.0,
|
|
"reward": 1.0126575469970702,
|
|
"reward_std": 0.12015107423067092,
|
|
"rewards/accgated_coverage_0": 0.017383670061826707,
|
|
"rewards/accgated_coverage_1": 0.017383670061826707,
|
|
"rewards/accgated_coverage_10": 0.017383670061826707,
|
|
"rewards/accgated_coverage_15": 0.02135354969650507,
|
|
"rewards/accgated_coverage_20": 0.04693642929196358,
|
|
"rewards/accgated_coverage_25": 0.10021267533302307,
|
|
"rewards/accgated_coverage_5": 0.017383670061826707,
|
|
"rewards/accuracy_reward": 0.70546875,
|
|
"rewards/brier_reward": 0.8442538380622864,
|
|
"rewards/confidence_uniqueness_reward": 0.9332508206367492,
|
|
"rewards/format_reward": 0.9918402791023254,
|
|
"rewards/frontier_aurc_reward": -0.0012258694274351,
|
|
"rewards/frontier_ece_reward": 0.002612181368749589,
|
|
"rewards/frontier_entropy_batch_reward": -0.37797077298164367,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07582537084817886,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10270393788814544,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07582537084817886,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10270393788814544,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07582537084817886,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.10270393788814544,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03511426188051701,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.047978077083826065,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0035114262253046038,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0035114262253046038,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.030998488515615465,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.0399199478328228,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0030998490750789643,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0030998490750789643,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05178140699863434,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06730167269706726,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005178140755742788,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005178140755742788,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07582537084817886,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10270393788814544,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007582537457346916,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.155908203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.20903717577457429,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0779541015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0779541015625,
|
|
"signal/advantage_abs_mean": 0.08711908012628555,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08711908012628555,
|
|
"signal/advantage_pre_scale_std": 0.14394052624702453,
|
|
"signal/advantage_std": 0.14394052624702453,
|
|
"signal/brier_reward/centered_abs_mean": 0.11152398288249969,
|
|
"signal/brier_reward/group_std_mean": 0.14993155002593994,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011152398772537708,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011152398772537708,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029369105771183967,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04833545163273811,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029369106981903315,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029369106981903315,
|
|
"signal/format_reward/centered_abs_mean": 0.01460503451526165,
|
|
"signal/format_reward/group_std_mean": 0.030626020580530166,
|
|
"signal/format_reward/group_zero_std_frac": 0.8611111283302307,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007302517257630825,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007302517257630825,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016249929554760455,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030195300932973623,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0312412016210146e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0312412016210146e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010075355507433414,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013354136049747467,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010075355181470512,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010075355181470512,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32258252501487733,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3932394325733185,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03225825130939484,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225825130939484,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11142001321196293,
|
|
"calibration/batch_distribution_entropy": 0.9423204741141762,
|
|
"calibration/buffer_distribution_entropy": 0.9661498449320355,
|
|
"calibration/confidence_entropy": 0.45821246300454394,
|
|
"calibration/coverage@0%": 0.1898169706104169,
|
|
"calibration/coverage@1%": 0.2035106542946567,
|
|
"calibration/coverage@10%": 0.533609767462017,
|
|
"calibration/coverage@15%": 0.6553213812147145,
|
|
"calibration/coverage@20%": 0.803748541020554,
|
|
"calibration/coverage@25%": 0.9049741637390127,
|
|
"calibration/coverage@30%": 0.9748223193534041,
|
|
"calibration/coverage@5%": 0.33688175700535544,
|
|
"calibration/ece": 0.12908904507599772,
|
|
"calibration/mean_confidence": 0.5928169319155115,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012239583333333326,
|
|
"completions/max_length": 3628.0,
|
|
"completions/max_terminated_length": 3628.0,
|
|
"completions/mean_length": 751.97587890625,
|
|
"completions/mean_terminated_length": 761.2690673828125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 224.6,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.000337122764904052,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.0114,
|
|
"num_tokens": 383518008.0,
|
|
"reward": 1.006504440307617,
|
|
"reward_std": 0.12649587243795396,
|
|
"rewards/accgated_coverage_0": 0.022072068974375726,
|
|
"rewards/accgated_coverage_1": 0.022072068974375726,
|
|
"rewards/accgated_coverage_10": 0.02190903965383768,
|
|
"rewards/accgated_coverage_15": 0.023819806054234505,
|
|
"rewards/accgated_coverage_20": 0.04952913150191307,
|
|
"rewards/accgated_coverage_25": 0.09890762567520142,
|
|
"rewards/accgated_coverage_5": 0.022072068974375726,
|
|
"rewards/accuracy_reward": 0.6897569417953491,
|
|
"rewards/brier_reward": 0.8337709426879882,
|
|
"rewards/confidence_uniqueness_reward": 0.9315677404403686,
|
|
"rewards/format_reward": 0.9875868082046508,
|
|
"rewards/frontier_aurc_reward": -0.0010118687408976258,
|
|
"rewards/frontier_ece_reward": 0.00214645602973178,
|
|
"rewards/frontier_entropy_batch_reward": -0.34941497445106506,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08707907050848007,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11673283874988556,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.00870790733024478,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.00870790733024478,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08707907050848007,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11673283874988556,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.00870790733024478,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.00870790733024478,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.0848253458738327,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.11379681825637818,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.008482534252107144,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.008482534252107144,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03722478076815605,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.04985408037900925,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003722478076815605,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003722478076815605,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03205550014972687,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.04064697846770286,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0032055501360446215,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0032055501360446215,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.04977491497993469,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06415863260626793,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.004977491591125727,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.004977491591125727,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08707907050848007,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11673283874988556,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.00870790733024478,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.00870790733024478,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17249348759651184,
|
|
"signal/accuracy_reward/group_std_mean": 0.2279975652694702,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35555556416511536,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08624674379825592,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08624674379825592,
|
|
"signal/advantage_abs_mean": 0.09142151474952698,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09142151474952698,
|
|
"signal/advantage_pre_scale_std": 0.15212540030479432,
|
|
"signal/advantage_std": 0.15212540030479432,
|
|
"signal/brier_reward/centered_abs_mean": 0.12030397355556488,
|
|
"signal/brier_reward/group_std_mean": 0.15930656492710113,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012030397728085517,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012030397728085517,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035230952501297,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05694400668144226,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035230953246355055,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035230953246355055,
|
|
"signal/format_reward/centered_abs_mean": 0.02116427943110466,
|
|
"signal/format_reward/group_std_mean": 0.04019532725214958,
|
|
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01058213971555233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01058213971555233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012590843951329588,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022685666335746646,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.5738555521238596e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.5738555521238596e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01081162467598915,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014261576719582081,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001081162504851818,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001081162504851818,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32662315368652345,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3984943747520447,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03266231343150139,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03266231343150139,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.08382240776518861,
|
|
"calibration/batch_distribution_entropy": 0.9263015077586324,
|
|
"calibration/buffer_distribution_entropy": 0.9654356750160844,
|
|
"calibration/confidence_entropy": 0.44053076052748086,
|
|
"calibration/coverage@0%": 0.11934866521125106,
|
|
"calibration/coverage@1%": 0.17077675660910055,
|
|
"calibration/coverage@10%": 0.6990107712808307,
|
|
"calibration/coverage@15%": 0.807274710180921,
|
|
"calibration/coverage@20%": 0.8950033786567826,
|
|
"calibration/coverage@25%": 0.9454006875453829,
|
|
"calibration/coverage@30%": 0.9649214659685864,
|
|
"calibration/coverage@5%": 0.5336004888954191,
|
|
"calibration/ece": 0.11662381439413354,
|
|
"calibration/mean_confidence": 0.6396629208521569,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011458333333333348,
|
|
"completions/max_length": 3815.8,
|
|
"completions/max_terminated_length": 3815.8,
|
|
"completions/mean_length": 724.7723876953125,
|
|
"completions/mean_terminated_length": 733.14853515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 226.4,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.00037727519520558417,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0111,
|
|
"num_tokens": 394967354.0,
|
|
"reward": 1.0016642928123474,
|
|
"reward_std": 0.12620580643415452,
|
|
"rewards/accgated_coverage_0": 0.021729044057428835,
|
|
"rewards/accgated_coverage_1": 0.021729044057428835,
|
|
"rewards/accgated_coverage_10": 0.02150404118001461,
|
|
"rewards/accgated_coverage_15": 0.02417885847389698,
|
|
"rewards/accgated_coverage_20": 0.052081949263811114,
|
|
"rewards/accgated_coverage_25": 0.10146590769290924,
|
|
"rewards/accgated_coverage_5": 0.021729044057428835,
|
|
"rewards/accuracy_reward": 0.6892361044883728,
|
|
"rewards/brier_reward": 0.829409658908844,
|
|
"rewards/confidence_uniqueness_reward": 0.9283636331558227,
|
|
"rewards/format_reward": 0.9884548664093018,
|
|
"rewards/frontier_aurc_reward": -0.0018609853694215416,
|
|
"rewards/frontier_ece_reward": 0.0019440729709458537,
|
|
"rewards/frontier_entropy_batch_reward": -0.39571504592895507,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0820931151509285,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11036419868469238,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008209311775863171,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008209311775863171,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0820931151509285,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11036419868469238,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008209311775863171,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008209311775863171,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.07484557554125786,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.1010680690407753,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.0074845578521490095,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.0074845578521490095,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.033568178117275235,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.044669998437166215,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003356817737221718,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003356817737221718,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.034712836146354675,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.044363278150558474,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034712836146354674,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034712836146354674,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05585875362157822,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07269425168633462,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005585875362157822,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005585875362157822,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0820931151509285,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11036419868469238,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008209311775863171,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008209311775863171,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16368272602558137,
|
|
"signal/accuracy_reward/group_std_mean": 0.21843498945236206,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08184136301279069,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08184136301279069,
|
|
"signal/advantage_abs_mean": 0.09044176638126374,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09044176638126374,
|
|
"signal/advantage_pre_scale_std": 0.15314349234104158,
|
|
"signal/advantage_std": 0.15314349234104158,
|
|
"signal/brier_reward/centered_abs_mean": 0.12139843702316284,
|
|
"signal/brier_reward/group_std_mean": 0.162265807390213,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012139843590557576,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012139843590557576,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03472979925572872,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.053919277340173724,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003472979832440615,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003472979832440615,
|
|
"signal/format_reward/centered_abs_mean": 0.018885633535683156,
|
|
"signal/format_reward/group_std_mean": 0.03502344973385334,
|
|
"signal/format_reward/group_zero_std_frac": 0.8555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009442816767841578,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009442816767841578,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002187176514416933,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004075382417067885,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.733970650297124e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.733970650297124e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009765653498470783,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012881954945623875,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000976565305609256,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000976565305609256,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33729991912841795,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40356319546699526,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033729993551969525,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033729993551969525,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16190140374322814,
|
|
"calibration/batch_distribution_entropy": 0.937421210212176,
|
|
"calibration/buffer_distribution_entropy": 0.9641604640658331,
|
|
"calibration/confidence_entropy": 0.4606058652211222,
|
|
"calibration/coverage@0%": 0.024603379429687745,
|
|
"calibration/coverage@1%": 0.024603379429687745,
|
|
"calibration/coverage@10%": 0.2731480414947021,
|
|
"calibration/coverage@15%": 0.5704934176134204,
|
|
"calibration/coverage@20%": 0.8450828196798226,
|
|
"calibration/coverage@25%": 0.913627509584862,
|
|
"calibration/coverage@30%": 0.9366492146596859,
|
|
"calibration/coverage@5%": 0.15475558298909925,
|
|
"calibration/ece": 0.16852490293275388,
|
|
"calibration/mean_confidence": 0.5998936936106494,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008940972222222187,
|
|
"completions/max_length": 3640.0,
|
|
"completions/max_terminated_length": 3640.0,
|
|
"completions/mean_length": 731.548876953125,
|
|
"completions/mean_terminated_length": 738.14892578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 221.6,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.00037856833660043776,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.0066,
|
|
"num_tokens": 406484845.0,
|
|
"reward": 1.000522756576538,
|
|
"reward_std": 0.12429632395505905,
|
|
"rewards/accgated_coverage_0": 0.02475869134068489,
|
|
"rewards/accgated_coverage_1": 0.02475869134068489,
|
|
"rewards/accgated_coverage_10": 0.024336008355021477,
|
|
"rewards/accgated_coverage_15": 0.02571437545120716,
|
|
"rewards/accgated_coverage_20": 0.05004611238837242,
|
|
"rewards/accgated_coverage_25": 0.09355643838644027,
|
|
"rewards/accgated_coverage_5": 0.024745855107903482,
|
|
"rewards/accuracy_reward": 0.6691840171813965,
|
|
"rewards/brier_reward": 0.8300941109657287,
|
|
"rewards/confidence_uniqueness_reward": 0.9367785811424255,
|
|
"rewards/format_reward": 0.9910590291023255,
|
|
"rewards/frontier_aurc_reward": -0.0013151126448065042,
|
|
"rewards/frontier_ece_reward": 0.0014089885400608182,
|
|
"rewards/frontier_entropy_batch_reward": -0.33202074766159057,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08384153693914413,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.111553256213665,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008384153712540865,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008384153712540865,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08384153693914413,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.111553256213665,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008384153712540865,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008384153712540865,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.06902736574411392,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.09257243126630783,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006902736704796552,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006902736704796552,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.030930518358945846,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.040364190191030505,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0030930519569665194,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0030930519569665194,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03231954351067543,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.041446197777986526,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0032319542951881886,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0032319542951881886,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.051111014932394026,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.06704937815666198,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005111101549118757,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005111101549118757,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08381979912519455,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.1115255281329155,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008381979819387198,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008381979819387198,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1699598550796509,
|
|
"signal/accuracy_reward/group_std_mean": 0.22352631986141205,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08497992753982545,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08497992753982545,
|
|
"signal/advantage_abs_mean": 0.09083193242549896,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09083193242549896,
|
|
"signal/advantage_pre_scale_std": 0.14602271616458892,
|
|
"signal/advantage_std": 0.14602271616458892,
|
|
"signal/brier_reward/centered_abs_mean": 0.12183561623096466,
|
|
"signal/brier_reward/group_std_mean": 0.1614099621772766,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01218356229364872,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01218356229364872,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028073800355196,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.046647604554891586,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028073799796402453,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028073799796402453,
|
|
"signal/format_reward/centered_abs_mean": 0.014816623367369174,
|
|
"signal/format_reward/group_std_mean": 0.030689219757914544,
|
|
"signal/format_reward/group_zero_std_frac": 0.8611111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007408311683684587,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007408311683684587,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015788348391652107,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030495470855385065,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9735435853363014e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9735435853363014e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010178772546350957,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013282094523310662,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010178772499784827,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010178772499784827,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3281850337982178,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3985232710838318,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032818502932786944,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032818502932786944,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14453371099003726,
|
|
"calibration/batch_distribution_entropy": 0.9147580072034586,
|
|
"calibration/buffer_distribution_entropy": 0.9639425978438201,
|
|
"calibration/confidence_entropy": 0.4524898443515701,
|
|
"calibration/coverage@0%": 0.09274777052981203,
|
|
"calibration/coverage@1%": 0.12086448140514361,
|
|
"calibration/coverage@10%": 0.37908342004865136,
|
|
"calibration/coverage@15%": 0.5675797783755056,
|
|
"calibration/coverage@20%": 0.713385961637515,
|
|
"calibration/coverage@25%": 0.9190044798065162,
|
|
"calibration/coverage@30%": 0.9780470800524934,
|
|
"calibration/coverage@5%": 0.29242188738490793,
|
|
"calibration/ece": 0.14082698722784545,
|
|
"calibration/mean_confidence": 0.6366452158551367,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.007552083333333348,
|
|
"completions/max_length": 3550.4,
|
|
"completions/max_terminated_length": 3550.4,
|
|
"completions/mean_length": 719.5197021484375,
|
|
"completions/mean_terminated_length": 724.99267578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 219.2,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.0003734733909368515,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0063,
|
|
"num_tokens": 417856656.0,
|
|
"reward": 1.0197997450828553,
|
|
"reward_std": 0.1204053670167923,
|
|
"rewards/accgated_coverage_0": 0.01778254872187972,
|
|
"rewards/accgated_coverage_1": 0.01778254872187972,
|
|
"rewards/accgated_coverage_10": 0.0201628603041172,
|
|
"rewards/accgated_coverage_15": 0.029619522020220758,
|
|
"rewards/accgated_coverage_20": 0.06293513551354409,
|
|
"rewards/accgated_coverage_25": 0.11633996367454529,
|
|
"rewards/accgated_coverage_5": 0.017779755219817162,
|
|
"rewards/accuracy_reward": 0.7078993082046509,
|
|
"rewards/brier_reward": 0.8345678091049195,
|
|
"rewards/confidence_uniqueness_reward": 0.9368413925170899,
|
|
"rewards/format_reward": 0.9924479246139526,
|
|
"rewards/frontier_aurc_reward": -0.0012165130581706762,
|
|
"rewards/frontier_ece_reward": -0.00039820900419726966,
|
|
"rewards/frontier_entropy_batch_reward": -0.35700035095214844,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.09048043787479401,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.12144993394613265,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009048044122755528,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009048044122755528,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.09048043787479401,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.12144993394613265,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009048044122755528,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009048044122755528,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.06684454083442688,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.09066204130649566,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.006684453692287207,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.006684453692287207,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03273606859147549,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.04233010783791542,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0032736069057136773,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0032736069057136773,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03768849298357964,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.04803970232605934,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0037688495591282844,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0037688495591282844,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05939576998353004,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07729223221540452,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005939576961100102,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005939576961100102,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.09042053371667862,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.12137292772531509,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009042053669691085,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009042053669691085,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16521809995174408,
|
|
"signal/accuracy_reward/group_std_mean": 0.21986591517925264,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08260904997587204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08260904997587204,
|
|
"signal/advantage_abs_mean": 0.08695598244667054,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08695598244667054,
|
|
"signal/advantage_pre_scale_std": 0.14129080772399902,
|
|
"signal/advantage_std": 0.14129080772399902,
|
|
"signal/brier_reward/centered_abs_mean": 0.11943345665931701,
|
|
"signal/brier_reward/group_std_mean": 0.15741896331310273,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011943346075713634,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011943346075713634,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027682187035679817,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04504421055316925,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002768218796700239,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002768218796700239,
|
|
"signal/format_reward/centered_abs_mean": 0.01363389752805233,
|
|
"signal/format_reward/group_std_mean": 0.028098611906170846,
|
|
"signal/format_reward/group_zero_std_frac": 0.8777777791023255,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006816948764026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006816948764026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013316195458173753,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022581091150641443,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6645244613755494e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6645244613755494e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010017194785177708,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012919113039970398,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010017195134423673,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010017195134423673,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3350070595741272,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40499748587608336,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033500705286860465,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033500705286860465,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14915417266796935,
|
|
"calibration/batch_distribution_entropy": 0.9576361146093342,
|
|
"calibration/buffer_distribution_entropy": 0.9639255010791874,
|
|
"calibration/confidence_entropy": 0.4497387116426815,
|
|
"calibration/coverage@0%": 0.045690067003181546,
|
|
"calibration/coverage@1%": 0.10611787448981255,
|
|
"calibration/coverage@10%": 0.3900509318187466,
|
|
"calibration/coverage@15%": 0.4850762773690467,
|
|
"calibration/coverage@20%": 0.7097961256800407,
|
|
"calibration/coverage@25%": 0.8674547346009558,
|
|
"calibration/coverage@30%": 0.9342891171376101,
|
|
"calibration/coverage@5%": 0.2691564589599168,
|
|
"calibration/ece": 0.1618369751691793,
|
|
"calibration/mean_confidence": 0.5709995873050344,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013020833333333325,
|
|
"completions/max_length": 3631.6,
|
|
"completions/max_terminated_length": 3631.6,
|
|
"completions/mean_length": 740.03681640625,
|
|
"completions/mean_terminated_length": 749.9174682617188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 210.4,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.00039503860170952976,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0122,
|
|
"num_tokens": 429462744.0,
|
|
"reward": 0.9920111894607544,
|
|
"reward_std": 0.13194906413555146,
|
|
"rewards/accgated_coverage_0": 0.019916841574013234,
|
|
"rewards/accgated_coverage_1": 0.019916841574013234,
|
|
"rewards/accgated_coverage_10": 0.0193513598293066,
|
|
"rewards/accgated_coverage_15": 0.026907961070537566,
|
|
"rewards/accgated_coverage_20": 0.053805211931467055,
|
|
"rewards/accgated_coverage_25": 0.09571786969900131,
|
|
"rewards/accgated_coverage_5": 0.019920169189572335,
|
|
"rewards/accuracy_reward": 0.6619791626930237,
|
|
"rewards/brier_reward": 0.815404748916626,
|
|
"rewards/confidence_uniqueness_reward": 0.932914924621582,
|
|
"rewards/format_reward": 0.9869791626930237,
|
|
"rewards/frontier_aurc_reward": -0.0016872843028977514,
|
|
"rewards/frontier_ece_reward": 0.0004102005223103333,
|
|
"rewards/frontier_entropy_batch_reward": -0.32873486876487734,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.0784646600484848,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10561068952083588,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007846465986222028,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007846465986222028,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.0784646600484848,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10561068952083588,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007846465986222028,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007846465986222028,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05457337722182274,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07424464225769042,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005457338038831949,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005457338038831949,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.028437989950180053,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.036998636275529864,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0028437990695238113,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0028437990695238113,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.034603772684931755,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.044347959011793135,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0034603772219270468,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0034603772219270468,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05456459298729897,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07072983086109161,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005456459615379572,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005456459615379572,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.07840372771024703,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10553145706653595,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007840372994542122,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007840372994542122,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15810546576976775,
|
|
"signal/accuracy_reward/group_std_mean": 0.21627101302146912,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07905273288488388,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07905273288488388,
|
|
"signal/advantage_abs_mean": 0.09530858844518661,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09530858844518661,
|
|
"signal/advantage_pre_scale_std": 0.15653879344463348,
|
|
"signal/advantage_std": 0.15653879344463348,
|
|
"signal/brier_reward/centered_abs_mean": 0.12907694429159164,
|
|
"signal/brier_reward/group_std_mean": 0.1693983793258667,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012907694093883038,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012907694093883038,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034757498651742935,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05477444678544998,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003475749958306551,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003475749958306551,
|
|
"signal/format_reward/centered_abs_mean": 0.0216796875,
|
|
"signal/format_reward/group_std_mean": 0.039159010723233224,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01083984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01083984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001960353879258037,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034777455497533084,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4504425164195708e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4504425164195708e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009798597171902656,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012810107320547104,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009798598010092973,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009798598010092973,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32911902070045473,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40096608400344846,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03291190341114998,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03291190341114998,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11813731305579875,
|
|
"calibration/batch_distribution_entropy": 0.9250420952698768,
|
|
"calibration/buffer_distribution_entropy": 0.9640337582651292,
|
|
"calibration/confidence_entropy": 0.47253733829761435,
|
|
"calibration/coverage@0%": 0.046461767915647115,
|
|
"calibration/coverage@1%": 0.046461767915647115,
|
|
"calibration/coverage@10%": 0.5523816210957633,
|
|
"calibration/coverage@15%": 0.6547474620660765,
|
|
"calibration/coverage@20%": 0.9090805045019799,
|
|
"calibration/coverage@25%": 0.9398860165794065,
|
|
"calibration/coverage@30%": 0.9665166884816754,
|
|
"calibration/coverage@5%": 0.35775492558328736,
|
|
"calibration/ece": 0.1328041371310841,
|
|
"calibration/mean_confidence": 0.6450623844795544,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.007552083333333304,
|
|
"completions/max_length": 3451.2,
|
|
"completions/max_terminated_length": 3451.2,
|
|
"completions/mean_length": 720.2349853515625,
|
|
"completions/mean_terminated_length": 725.751220703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 213.0,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.00045016928925178945,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0057,
|
|
"num_tokens": 440827659.0,
|
|
"reward": 1.0121694087982178,
|
|
"reward_std": 0.12391498684883118,
|
|
"rewards/accgated_coverage_0": 0.017262194491922855,
|
|
"rewards/accgated_coverage_1": 0.017262194491922855,
|
|
"rewards/accgated_coverage_10": 0.01849367544054985,
|
|
"rewards/accgated_coverage_15": 0.029358771443367005,
|
|
"rewards/accgated_coverage_20": 0.06178856343030929,
|
|
"rewards/accgated_coverage_25": 0.10975199192762375,
|
|
"rewards/accgated_coverage_5": 0.017268973495811225,
|
|
"rewards/accuracy_reward": 0.6933159708976746,
|
|
"rewards/brier_reward": 0.8302036046981811,
|
|
"rewards/confidence_uniqueness_reward": 0.93699049949646,
|
|
"rewards/format_reward": 0.9922743082046509,
|
|
"rewards/frontier_aurc_reward": -0.0020338458009064196,
|
|
"rewards/frontier_ece_reward": -0.0003850290362606756,
|
|
"rewards/frontier_entropy_batch_reward": -0.3439982354640961,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.07879135310649872,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.10471928268671035,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.007879135478287936,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.007879135478287936,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.07879135310649872,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.10471928268671035,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.007879135478287936,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.007879135478287936,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05094265937805176,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.06902378126978874,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005094265658408403,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005094265658408403,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.028977422043681145,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.03736466318368912,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.002897742437198758,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.002897742437198758,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.038806602358818054,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.04972253888845444,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0038806602358818056,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0038806602358818056,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.06239664033055305,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.08014876991510392,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.00623966408893466,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.00623966408893466,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.0787682592868805,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.10468966215848922,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.007876825984567404,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.007876825984567404,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1601508229970932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2075590342283249,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0800754114985466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0800754114985466,
|
|
"signal/advantage_abs_mean": 0.09237567484378814,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09237567484378814,
|
|
"signal/advantage_pre_scale_std": 0.14858520925045013,
|
|
"signal/advantage_std": 0.14858520925045013,
|
|
"signal/brier_reward/centered_abs_mean": 0.11585159003734588,
|
|
"signal/brier_reward/group_std_mean": 0.15270988047122955,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01158515941351652,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01158515941351652,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027825209125876427,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04609650820493698,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002782521024346352,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002782521024346352,
|
|
"signal/format_reward/centered_abs_mean": 0.01366644985973835,
|
|
"signal/format_reward/group_std_mean": 0.029148318618535996,
|
|
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006833224929869175,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006833224929869175,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002534387307241559,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0048497423063963655,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.167984214087482e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.167984214087482e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009504923969507218,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012250457704067231,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009504924179054797,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009504924179054797,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32950940132141116,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39927846789360044,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032950940728187564,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032950940728187564,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.1088882915120361,
|
|
"eval_calibration/batch_distribution_entropy": 0.884755325790068,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9641450900696295,
|
|
"eval_calibration/confidence_entropy": 0.4517086267080035,
|
|
"eval_calibration/coverage@0%": 0.3333333333333333,
|
|
"eval_calibration/coverage@1%": 0.3333333333333333,
|
|
"eval_calibration/coverage@10%": 0.59375,
|
|
"eval_calibration/coverage@15%": 0.6979166666666666,
|
|
"eval_calibration/coverage@20%": 0.8489583333333334,
|
|
"eval_calibration/coverage@25%": 0.9114583333333334,
|
|
"eval_calibration/coverage@30%": 0.9791666666666666,
|
|
"eval_calibration/coverage@5%": 0.34375,
|
|
"eval_calibration/ece": 0.18822067071656992,
|
|
"eval_calibration/mean_confidence": 0.6140182765846043,
|
|
"eval_completions/clipped_ratio": 0.010416666666666666,
|
|
"eval_completions/max_length": 2482.0,
|
|
"eval_completions/max_terminated_length": 2482.0,
|
|
"eval_completions/mean_length": 720.477793375651,
|
|
"eval_completions/mean_terminated_length": 728.0352681477865,
|
|
"eval_completions/min_length": 57.333333333333336,
|
|
"eval_completions/min_terminated_length": 251.16666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 440827659.0,
|
|
"eval_reward": 0.9302136798699697,
|
|
"eval_reward_std": 0.232889657219251,
|
|
"eval_rewards/accgated_coverage_0": 0.021035971275220316,
|
|
"eval_rewards/accgated_coverage_1": 0.021035971275220316,
|
|
"eval_rewards/accgated_coverage_10": 0.020410844823345542,
|
|
"eval_rewards/accgated_coverage_15": 0.029529539868235588,
|
|
"eval_rewards/accgated_coverage_20": 0.05993118633826574,
|
|
"eval_rewards/accgated_coverage_25": 0.10444261009494464,
|
|
"eval_rewards/accgated_coverage_5": 0.021037622820585966,
|
|
"eval_rewards/accuracy_reward": 0.6718750099341074,
|
|
"eval_rewards/brier_reward": 0.8273646434148153,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8833588063716888,
|
|
"eval_rewards/format_reward": 0.988715281089147,
|
|
"eval_rewards/frontier_aurc_reward": -0.0017877276889824618,
|
|
"eval_rewards/frontier_ece_reward": -2.3216848300459485e-05,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.988715281089147,
|
|
"eval_runtime": 205.9088,
|
|
"eval_samples_per_second": 4.857,
|
|
"eval_signal/accgated_coverage_0/centered_abs_mean": 0.12559553111592928,
|
|
"eval_signal/accgated_coverage_0/group_std_mean": 0.17820352067550024,
|
|
"eval_signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.01255955391873916,
|
|
"eval_signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_0/weighted_centered_abs_mean": 0.01255955391873916,
|
|
"eval_signal/accgated_coverage_1/centered_abs_mean": 0.12559553111592928,
|
|
"eval_signal/accgated_coverage_1/group_std_mean": 0.17820352067550024,
|
|
"eval_signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.01255955391873916,
|
|
"eval_signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_1/weighted_centered_abs_mean": 0.01255955391873916,
|
|
"eval_signal/accgated_coverage_10/centered_abs_mean": 0.07524273234109084,
|
|
"eval_signal/accgated_coverage_10/group_std_mean": 0.11235688626766205,
|
|
"eval_signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.007524273591116071,
|
|
"eval_signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_10/weighted_centered_abs_mean": 0.007524273591116071,
|
|
"eval_signal/accgated_coverage_15/centered_abs_mean": 0.039298239474495254,
|
|
"eval_signal/accgated_coverage_15/group_std_mean": 0.049893214677770935,
|
|
"eval_signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.003929823908644418,
|
|
"eval_signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_15/weighted_centered_abs_mean": 0.003929823908644418,
|
|
"eval_signal/accgated_coverage_20/centered_abs_mean": 0.06626473863919576,
|
|
"eval_signal/accgated_coverage_20/group_std_mean": 0.08086467534303665,
|
|
"eval_signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0066264736621330185,
|
|
"eval_signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0066264736621330185,
|
|
"eval_signal/accgated_coverage_25/centered_abs_mean": 0.11843342582384746,
|
|
"eval_signal/accgated_coverage_25/group_std_mean": 0.14078539858261743,
|
|
"eval_signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.011843343110134205,
|
|
"eval_signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_25/weighted_centered_abs_mean": 0.011843343110134205,
|
|
"eval_signal/accgated_coverage_5/centered_abs_mean": 0.12558058152596155,
|
|
"eval_signal/accgated_coverage_5/group_std_mean": 0.17818409701188406,
|
|
"eval_signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.012558058214684328,
|
|
"eval_signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/accgated_coverage_5/weighted_centered_abs_mean": 0.012558058214684328,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4229600677887599,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4658859223127365,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21148003389437994,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21148003389437994,
|
|
"eval_signal/advantage_abs_mean": 0.19910976042350134,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19910976042350134,
|
|
"eval_signal/advantage_pre_scale_std": 0.23237022509177527,
|
|
"eval_signal/advantage_std": 0.23237022509177527,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.17643060783545175,
|
|
"eval_signal/brier_reward/group_std_mean": 0.23687549928824106,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01764306053519249,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01764306053519249,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0549784650405248,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08964706336458524,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005497846674794952,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005497846674794952,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.021647135416666668,
|
|
"eval_signal/format_reward/group_std_mean": 0.057857211058338485,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.6944444676240286,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010823567708333334,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.010823567708333334,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030125895670304694,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006865158909931779,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.765737180098464e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.765737180098464e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013848148131122192,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.01800649023304383,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001384814813112219,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001384814813112219,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.021647135416666668,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.057857211058338485,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6944444676240286,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0021647136115158596,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0021647136115158596,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16183124767173385,
|
|
"calibration/batch_distribution_entropy": 0.903965862936228,
|
|
"calibration/buffer_distribution_entropy": 0.9639420109475438,
|
|
"calibration/confidence_entropy": 0.4274784320917583,
|
|
"calibration/coverage@0%": 0.04289515021782449,
|
|
"calibration/coverage@1%": 0.04289515021782449,
|
|
"calibration/coverage@10%": 0.31463650649209207,
|
|
"calibration/coverage@15%": 0.5353159409957227,
|
|
"calibration/coverage@20%": 0.8229195440318524,
|
|
"calibration/coverage@25%": 0.8926666046296227,
|
|
"calibration/coverage@30%": 0.9345367077157132,
|
|
"calibration/coverage@5%": 0.12416531949199486,
|
|
"calibration/ece": 0.11350287813588537,
|
|
"calibration/mean_confidence": 0.6439586636148722,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009114583333333325,
|
|
"completions/max_length": 3574.4,
|
|
"completions/max_terminated_length": 3574.4,
|
|
"completions/mean_length": 734.712060546875,
|
|
"completions/mean_terminated_length": 741.4294067382813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 207.0,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.00032840637140907347,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0075,
|
|
"num_tokens": 452357494.0,
|
|
"reward": 1.0280473828315735,
|
|
"reward_std": 0.12178252339363098,
|
|
"rewards/accgated_coverage_0": 0.010969918034970761,
|
|
"rewards/accgated_coverage_1": 0.010969918034970761,
|
|
"rewards/accgated_coverage_10": 0.016975909285247327,
|
|
"rewards/accgated_coverage_15": 0.0357561755925417,
|
|
"rewards/accgated_coverage_20": 0.07814027667045594,
|
|
"rewards/accgated_coverage_25": 0.13613629937171937,
|
|
"rewards/accgated_coverage_5": 0.010972013510763646,
|
|
"rewards/accuracy_reward": 0.7239583373069763,
|
|
"rewards/brier_reward": 0.8324974060058594,
|
|
"rewards/confidence_uniqueness_reward": 0.9338359713554383,
|
|
"rewards/format_reward": 0.9907986164093018,
|
|
"rewards/frontier_aurc_reward": -0.0017422198783606292,
|
|
"rewards/frontier_ece_reward": -0.0017038072284776718,
|
|
"rewards/frontier_entropy_batch_reward": -0.35764376521110536,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.09229595065116883,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.12302704006433487,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.009229595586657525,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.009229595586657525,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.09229595065116883,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.12302704006433487,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.009229595586657525,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.009229595586657525,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.056336633116006854,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.07609608769416809,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005633663292974234,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005633663292974234,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03357893191277981,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.042671628296375275,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.00335789336822927,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.00335789336822927,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.04358198344707489,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.0558698907494545,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.004358198214322329,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.004358198214322329,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.0666369266808033,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.0865581214427948,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.006663692649453878,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.006663692649453878,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.09228282570838928,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.12301015555858612,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.009228283166885376,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.009228283166885376,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1586371511220932,
|
|
"signal/accuracy_reward/group_std_mean": 0.21199294328689575,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0793185755610466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0793185755610466,
|
|
"signal/advantage_abs_mean": 0.08658735156059265,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08658735156059265,
|
|
"signal/advantage_pre_scale_std": 0.145833295583725,
|
|
"signal/advantage_std": 0.145833295583725,
|
|
"signal/brier_reward/centered_abs_mean": 0.11947631686925889,
|
|
"signal/brier_reward/group_std_mean": 0.15880028307437896,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0119476318359375,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0119476318359375,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03140333443880081,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05210669934749603,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031403335742652416,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031403335742652416,
|
|
"signal/format_reward/centered_abs_mean": 0.016384548787027598,
|
|
"signal/format_reward/group_std_mean": 0.03408227376639843,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008192274393513799,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008192274393513799,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019668075372464957,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035003958269953727,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.458509425196098e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.458509425196098e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009487048350274562,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012332708947360516,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009487048489972949,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009487048489972949,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33413779735565186,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4035483181476593,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033413780853152275,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033413780853152275,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.08513920603973812,
|
|
"calibration/batch_distribution_entropy": 0.9141002430834693,
|
|
"calibration/buffer_distribution_entropy": 0.9630652371634193,
|
|
"calibration/confidence_entropy": 0.4457588800263559,
|
|
"calibration/coverage@0%": 0.0602074695165456,
|
|
"calibration/coverage@1%": 0.22643897517363867,
|
|
"calibration/coverage@10%": 0.6904621196478132,
|
|
"calibration/coverage@15%": 0.8459882447770486,
|
|
"calibration/coverage@20%": 0.9142371635896023,
|
|
"calibration/coverage@25%": 0.9719849361623955,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.4957522217913381,
|
|
"calibration/ece": 0.11242408581979568,
|
|
"calibration/mean_confidence": 0.6651221303123294,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.006510416666666667,
|
|
"completions/max_length": 3355.6666666666665,
|
|
"completions/max_terminated_length": 3355.6666666666665,
|
|
"completions/mean_length": 733.4699096679688,
|
|
"completions/mean_terminated_length": 738.4192504882812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 219.66666666666666,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 459293286.0,
|
|
"reward": 1.0133548974990845,
|
|
"reward_std": 0.12018975863854091,
|
|
"rewards/accgated_coverage_0": 0.014022265560925007,
|
|
"rewards/accgated_coverage_1": 0.014022265560925007,
|
|
"rewards/accgated_coverage_10": 0.01640247491498788,
|
|
"rewards/accgated_coverage_15": 0.03245990971724192,
|
|
"rewards/accgated_coverage_20": 0.06980260213216145,
|
|
"rewards/accgated_coverage_25": 0.12002388884623845,
|
|
"rewards/accgated_coverage_5": 0.014022964673737684,
|
|
"rewards/accuracy_reward": 0.6903935273488363,
|
|
"rewards/brier_reward": 0.8296072085698446,
|
|
"rewards/confidence_uniqueness_reward": 0.9375643134117126,
|
|
"rewards/format_reward": 0.9934896032015482,
|
|
"rewards/frontier_aurc_reward": -0.001204290989941607,
|
|
"rewards/frontier_ece_reward": -0.00047483538219239563,
|
|
"rewards/frontier_entropy_batch_reward": -0.3331694006919861,
|
|
"signal/accgated_coverage_0/centered_abs_mean": 0.08532695472240448,
|
|
"signal/accgated_coverage_0/group_std_mean": 0.11210805177688599,
|
|
"signal/accgated_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_0/scaled_weighted_centered_abs_mean": 0.008532696248342594,
|
|
"signal/accgated_coverage_0/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_0/weighted_centered_abs_mean": 0.008532696248342594,
|
|
"signal/accgated_coverage_1/centered_abs_mean": 0.08532695472240448,
|
|
"signal/accgated_coverage_1/group_std_mean": 0.11210805177688599,
|
|
"signal/accgated_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_1/scaled_weighted_centered_abs_mean": 0.008532696248342594,
|
|
"signal/accgated_coverage_1/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_1/weighted_centered_abs_mean": 0.008532696248342594,
|
|
"signal/accgated_coverage_10/centered_abs_mean": 0.05023227507869402,
|
|
"signal/accgated_coverage_10/group_std_mean": 0.0667143886288007,
|
|
"signal/accgated_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_10/scaled_weighted_centered_abs_mean": 0.005023227694133918,
|
|
"signal/accgated_coverage_10/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_10/weighted_centered_abs_mean": 0.005023227694133918,
|
|
"signal/accgated_coverage_15/centered_abs_mean": 0.03077574260532856,
|
|
"signal/accgated_coverage_15/group_std_mean": 0.039111041774352394,
|
|
"signal/accgated_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_15/scaled_weighted_centered_abs_mean": 0.0030775743070989847,
|
|
"signal/accgated_coverage_15/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_15/weighted_centered_abs_mean": 0.0030775743070989847,
|
|
"signal/accgated_coverage_20/centered_abs_mean": 0.03875031570593516,
|
|
"signal/accgated_coverage_20/group_std_mean": 0.04919542744755745,
|
|
"signal/accgated_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_20/scaled_weighted_centered_abs_mean": 0.0038750318344682455,
|
|
"signal/accgated_coverage_20/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_20/weighted_centered_abs_mean": 0.0038750318344682455,
|
|
"signal/accgated_coverage_25/centered_abs_mean": 0.05905377368132273,
|
|
"signal/accgated_coverage_25/group_std_mean": 0.07615283379952113,
|
|
"signal/accgated_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_25/scaled_weighted_centered_abs_mean": 0.005905377523352702,
|
|
"signal/accgated_coverage_25/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_25/weighted_centered_abs_mean": 0.005905377523352702,
|
|
"signal/accgated_coverage_5/centered_abs_mean": 0.08531387398640315,
|
|
"signal/accgated_coverage_5/group_std_mean": 0.11209150652090709,
|
|
"signal/accgated_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/accgated_coverage_5/scaled_weighted_centered_abs_mean": 0.008531387584904829,
|
|
"signal/accgated_coverage_5/weight": 0.10000000149011612,
|
|
"signal/accgated_coverage_5/weighted_centered_abs_mean": 0.008531387584904829,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.162398728231589,
|
|
"signal/accuracy_reward/group_std_mean": 0.21247334778308868,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4027777910232544,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0811993641157945,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0811993641157945,
|
|
"signal/advantage_abs_mean": 0.08777189254760742,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08777189254760742,
|
|
"signal/advantage_pre_scale_std": 0.14062808950742087,
|
|
"signal/advantage_std": 0.14062808950742087,
|
|
"signal/brier_reward/centered_abs_mean": 0.11930795510609944,
|
|
"signal/brier_reward/group_std_mean": 0.15778929988543192,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011930795386433601,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011930795386433601,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026650328810016315,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04459113130966822,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002665032943089803,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002665032943089803,
|
|
"signal/format_reward/centered_abs_mean": 0.011962890314559141,
|
|
"signal/format_reward/group_std_mean": 0.02671019857128461,
|
|
"signal/format_reward/group_zero_std_frac": 0.875000019868215,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005981445157279571,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005981445157279571,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013815810283025105,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024687413048620024,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.72697618836537e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.72697618836537e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010010889731347561,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01289159276833137,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001001088957612713,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001001088957612713,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3178635636965434,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.389106810092926,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031786357363065086,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031786357363065086,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.009436613047280563,
|
|
"train_runtime": 40838.7093,
|
|
"train_samples_per_second": 0.367,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 459293286,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|