8566 lines
521 KiB
JSON
8566 lines
521 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.6250458868239556,
|
|
"calibration/batch_distribution_entropy": 0.6536619016238594,
|
|
"calibration/confidence_entropy": 0.3506516141955464,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.48844069501450277,
|
|
"calibration/mean_confidence": 0.7871118547273157,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03759765625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1517.0,
|
|
"completions/mean_length": 272.25546875,
|
|
"completions/mean_terminated_length": 222.88035583496094,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.16206742823123932,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0938,
|
|
"num_tokens": 17631928.0,
|
|
"reward": 0.6381880164146423,
|
|
"reward_std": 0.4766306221485138,
|
|
"rewards/accuracy_reward": 0.22021484375,
|
|
"rewards/brier_reward": 0.37847437858581545,
|
|
"rewards/confidence_uniqueness_reward": 0.4914248585700989,
|
|
"rewards/format_reward": 0.6865234375,
|
|
"rewards/frontier_aurc_reward": 0.3039612889289856,
|
|
"rewards/frontier_coverage_1": 0.3039612889289856,
|
|
"rewards/frontier_coverage_10": 0.3039612889289856,
|
|
"rewards/frontier_coverage_15": 0.3039612889289856,
|
|
"rewards/frontier_coverage_20": 0.3039612889289856,
|
|
"rewards/frontier_coverage_25": 0.3039612889289856,
|
|
"rewards/frontier_coverage_5": 0.3039612889289856,
|
|
"rewards/frontier_ece_reward": 0.3039612889289856,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.241912841796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2817725300788879,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.325,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1209564208984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1209564208984375,
|
|
"signal/advantage_abs_mean": 0.40599689483642576,
|
|
"signal/advantage_pre_scale_abs_mean": 0.40599689483642576,
|
|
"signal/advantage_pre_scale_std": 0.48591635227203367,
|
|
"signal/advantage_std": 0.48591635227203367,
|
|
"signal/brier_reward/centered_abs_mean": 0.32122361063957217,
|
|
"signal/brier_reward/group_std_mean": 0.36595953106880186,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04015295132994652,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.04015295132994652,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2976445615291595,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3497284233570099,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03720557019114494,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03720557019114494,
|
|
"signal/format_reward/centered_abs_mean": 0.40189208984375,
|
|
"signal/format_reward/group_std_mean": 0.4530576765537262,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.200946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.200946044921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005291631631553173,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2956218898296356,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3452231645584106,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03695273622870445,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03695273622870445,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6812939989066674,
|
|
"calibration/batch_distribution_entropy": 0.651988259109008,
|
|
"calibration/confidence_entropy": 0.3465823907555327,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5245675857124956,
|
|
"calibration/mean_confidence": 0.7919402249592102,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0349609375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1516.4,
|
|
"completions/mean_length": 260.02294921875,
|
|
"completions/mean_terminated_length": 213.81815490722656,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.052768442779779434,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0933,
|
|
"num_tokens": 35394915.0,
|
|
"reward": 0.6551937222480774,
|
|
"reward_std": 0.44631595611572267,
|
|
"rewards/accuracy_reward": 0.21103515625,
|
|
"rewards/brier_reward": 0.3817343056201935,
|
|
"rewards/confidence_uniqueness_reward": 0.5185160636901855,
|
|
"rewards/format_reward": 0.72353515625,
|
|
"rewards/frontier_aurc_reward": 0.30114771127700807,
|
|
"rewards/frontier_coverage_1": 0.30114771127700807,
|
|
"rewards/frontier_coverage_10": 0.30114771127700807,
|
|
"rewards/frontier_coverage_15": 0.30114771127700807,
|
|
"rewards/frontier_coverage_20": 0.30114771127700807,
|
|
"rewards/frontier_coverage_25": 0.30114771127700807,
|
|
"rewards/frontier_coverage_5": 0.30114771127700807,
|
|
"rewards/frontier_ece_reward": 0.30114771127700807,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.218841552734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.26480883955955503,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.334375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1094207763671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1094207763671875,
|
|
"signal/advantage_abs_mean": 0.36836239099502566,
|
|
"signal/advantage_pre_scale_abs_mean": 0.36836239099502566,
|
|
"signal/advantage_pre_scale_std": 0.45523207187652587,
|
|
"signal/advantage_std": 0.45523207187652587,
|
|
"signal/brier_reward/centered_abs_mean": 0.30253963470458983,
|
|
"signal/brier_reward/group_std_mean": 0.35128949880599974,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03781745433807373,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03781745433807373,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2798932909965515,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3376554548740387,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03498666137456894,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03498666137456894,
|
|
"signal/format_reward/centered_abs_mean": 0.368145751953125,
|
|
"signal/format_reward/group_std_mean": 0.4317262291908264,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1840728759765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1840728759765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004929297603666782,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2753797650337219,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3300995469093323,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03442247062921524,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03442247062921524,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5846727034247318,
|
|
"calibration/batch_distribution_entropy": 0.6331557206234091,
|
|
"calibration/buffer_distribution_entropy": 0.6631927066094524,
|
|
"calibration/confidence_entropy": 0.34227047283975975,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.47909140338947587,
|
|
"calibration/mean_confidence": 0.8098621162720414,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01513671875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1490.8,
|
|
"completions/mean_length": 196.65634765625,
|
|
"completions/mean_terminated_length": 176.16664123535156,
|
|
"completions/min_length": 12.4,
|
|
"completions/min_terminated_length": 12.4,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.03570015728473663,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0544,
|
|
"num_tokens": 52457412.0,
|
|
"reward": 0.7937239408493042,
|
|
"reward_std": 0.3412171393632889,
|
|
"rewards/accuracy_reward": 0.26875,
|
|
"rewards/brier_reward": 0.48464107513427734,
|
|
"rewards/confidence_uniqueness_reward": 0.6457258105278015,
|
|
"rewards/format_reward": 0.88681640625,
|
|
"rewards/frontier_aurc_reward": 0.2983596006408334,
|
|
"rewards/frontier_coverage_1": 0.3137420117855072,
|
|
"rewards/frontier_coverage_10": 0.3137420117855072,
|
|
"rewards/frontier_coverage_15": 0.3137420117855072,
|
|
"rewards/frontier_coverage_20": 0.3137420117855072,
|
|
"rewards/frontier_coverage_25": 0.3137420117855072,
|
|
"rewards/frontier_coverage_5": 0.3137420117855072,
|
|
"rewards/frontier_ece_reward": 0.2848668903112411,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1978515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.24534497857093812,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.359375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09892578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09892578125,
|
|
"signal/advantage_abs_mean": 0.25668698251247407,
|
|
"signal/advantage_pre_scale_abs_mean": 0.25668698251247407,
|
|
"signal/advantage_pre_scale_std": 0.35499538779258727,
|
|
"signal/advantage_std": 0.35499538779258727,
|
|
"signal/brier_reward/centered_abs_mean": 0.26803810596466066,
|
|
"signal/brier_reward/group_std_mean": 0.3223264396190643,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03350476324558258,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03350476324558258,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19349431693553926,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.25684032440185545,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024186789616942407,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.024186789616942407,
|
|
"signal/format_reward/centered_abs_mean": 0.186651611328125,
|
|
"signal/format_reward/group_std_mean": 0.285383003950119,
|
|
"signal/format_reward/group_zero_std_frac": 0.084375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0933258056640625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0933258056640625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.21558147557079793,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.26004184521734713,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0038589084782870487,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0038589084782870487,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23389289379119874,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28874107003211974,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23389289379119874,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28874107003211974,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23389289379119874,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28874107003211974,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23389289379119874,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28874107003211974,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23389289379119874,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28874107003211974,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23389289379119874,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28874107003211974,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004186682868748903,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.23874643296003342,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.28933488130569457,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029843304120004178,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029843304120004178,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5276057163911909,
|
|
"calibration/batch_distribution_entropy": 0.6998150529760003,
|
|
"calibration/buffer_distribution_entropy": 0.6600571871120245,
|
|
"calibration/confidence_entropy": 0.3813088817990363,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.36694634126724984,
|
|
"calibration/mean_confidence": 0.7755121386863615,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.003515625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1040.4,
|
|
"completions/mean_length": 141.4296875,
|
|
"completions/mean_terminated_length": 136.518115234375,
|
|
"completions/min_length": 27.8,
|
|
"completions/min_terminated_length": 27.8,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.01592865027487278,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.011,
|
|
"num_tokens": 68824052.0,
|
|
"reward": 0.8271668314933777,
|
|
"reward_std": 0.21373497247695922,
|
|
"rewards/accuracy_reward": 0.3455078125,
|
|
"rewards/brier_reward": 0.5776345729827881,
|
|
"rewards/confidence_uniqueness_reward": 0.7443219065666199,
|
|
"rewards/format_reward": 0.97890625,
|
|
"rewards/frontier_aurc_reward": -0.0066505827009677885,
|
|
"rewards/frontier_coverage_1": 0.06369199305772781,
|
|
"rewards/frontier_coverage_10": 0.06369199305772781,
|
|
"rewards/frontier_coverage_15": 0.06369199305772781,
|
|
"rewards/frontier_coverage_20": 0.06369199305772781,
|
|
"rewards/frontier_coverage_25": 0.06369199305772781,
|
|
"rewards/frontier_coverage_5": 0.06369199305772781,
|
|
"rewards/frontier_ece_reward": -0.05605001614894718,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20001220703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2515187919139862,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.100006103515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.100006103515625,
|
|
"signal/advantage_abs_mean": 0.16012502908706666,
|
|
"signal/advantage_pre_scale_abs_mean": 0.16012502908706666,
|
|
"signal/advantage_pre_scale_std": 0.2318114757537842,
|
|
"signal/advantage_std": 0.2318114757537842,
|
|
"signal/brier_reward/centered_abs_mean": 0.23968503773212432,
|
|
"signal/brier_reward/group_std_mean": 0.2950502038002014,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02996062971651554,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02996062971651554,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1224755346775055,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.15922289788722993,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015309441834688187,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015309441834688187,
|
|
"signal/format_reward/centered_abs_mean": 0.03966064453125,
|
|
"signal/format_reward/group_std_mean": 0.09460565596818923,
|
|
"signal/format_reward/group_zero_std_frac": 0.540625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.019830322265625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.019830322265625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004445481114089489,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0060803060419857505,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.957410998642445e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.957410998642445e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10291247516870498,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15976795852184295,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10291247516870498,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15976795852184295,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10291247516870498,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15976795852184295,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10291247516870498,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15976795852184295,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10291247516870498,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15976795852184295,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10291247516870498,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15976795852184295,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018421332584694027,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1306297332048416,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16125866770744324,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0163287166506052,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0163287166506052,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6060577192384174,
|
|
"calibration/batch_distribution_entropy": 0.7973205111041336,
|
|
"calibration/buffer_distribution_entropy": 0.6869138037888193,
|
|
"calibration/confidence_entropy": 0.44325387706996455,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4005991584031368,
|
|
"calibration/mean_confidence": 0.7169919419052851,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1300.0,
|
|
"completions/max_terminated_length": 864.4,
|
|
"completions/mean_length": 121.075390625,
|
|
"completions/mean_terminated_length": 120.24693145751954,
|
|
"completions/min_length": 23.4,
|
|
"completions/min_terminated_length": 23.4,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.019067738205194473,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 84997016.0,
|
|
"reward": 0.8636497735977173,
|
|
"reward_std": 0.1769395649433136,
|
|
"rewards/accuracy_reward": 0.36455078125,
|
|
"rewards/brier_reward": 0.6381376504898071,
|
|
"rewards/confidence_uniqueness_reward": 0.8125494122505188,
|
|
"rewards/format_reward": 0.99326171875,
|
|
"rewards/frontier_aurc_reward": -0.005689960345625878,
|
|
"rewards/frontier_coverage_1": 0.07561915218830109,
|
|
"rewards/frontier_coverage_10": 0.07561915218830109,
|
|
"rewards/frontier_coverage_15": 0.07561915218830109,
|
|
"rewards/frontier_coverage_20": 0.07561915218830109,
|
|
"rewards/frontier_coverage_25": 0.07561915218830109,
|
|
"rewards/frontier_coverage_5": 0.07561915218830109,
|
|
"rewards/frontier_ece_reward": -0.036895965412259105,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.188482666015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2342788815498352,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0942413330078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0942413330078125,
|
|
"signal/advantage_abs_mean": 0.13878562450408935,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13878562450408935,
|
|
"signal/advantage_pre_scale_std": 0.19872219264507293,
|
|
"signal/advantage_std": 0.19872219264507293,
|
|
"signal/brier_reward/centered_abs_mean": 0.21654031574726104,
|
|
"signal/brier_reward/group_std_mean": 0.26848899722099306,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02706753946840763,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02706753946840763,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.074843430519104,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09928269833326339,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009355428814888,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009355428814888,
|
|
"signal/format_reward/centered_abs_mean": 0.012176513671875,
|
|
"signal/format_reward/group_std_mean": 0.029392263293266295,
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0060882568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0060882568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033756108488887547,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004790552891790867,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.042343229637481e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.042343229637481e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1333490714430809,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1931760311126709,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1333490714430809,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1931760311126709,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1333490714430809,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1931760311126709,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1333490714430809,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1931760311126709,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1333490714430809,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1931760311126709,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1333490714430809,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1931760311126709,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023869482800364496,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.11479663252830505,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.13996243476867676,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014349579066038131,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014349579066038131,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.607214118060659,
|
|
"calibration/batch_distribution_entropy": 0.8750953970046617,
|
|
"calibration/buffer_distribution_entropy": 0.7317237070035948,
|
|
"calibration/confidence_entropy": 0.5109259837341227,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.02109375,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.30961384684766546,
|
|
"calibration/mean_confidence": 0.6211195733784108,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 1318.6,
|
|
"completions/max_terminated_length": 528.2,
|
|
"completions/mean_length": 122.15283203125,
|
|
"completions/mean_terminated_length": 121.18623199462891,
|
|
"completions/min_length": 33.0,
|
|
"completions/min_terminated_length": 33.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.002318607410416007,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 101292469.0,
|
|
"reward": 0.8805407524108887,
|
|
"reward_std": 0.15506583750247954,
|
|
"rewards/accuracy_reward": 0.3703125,
|
|
"rewards/brier_reward": 0.682115888595581,
|
|
"rewards/confidence_uniqueness_reward": 0.8357307076454162,
|
|
"rewards/format_reward": 0.99765625,
|
|
"rewards/frontier_aurc_reward": -0.005162287503480911,
|
|
"rewards/frontier_coverage_1": 0.09234738796949386,
|
|
"rewards/frontier_coverage_10": 0.09234738796949386,
|
|
"rewards/frontier_coverage_15": 0.09234738796949386,
|
|
"rewards/frontier_coverage_20": 0.09234738796949386,
|
|
"rewards/frontier_coverage_25": 0.09234738796949386,
|
|
"rewards/frontier_coverage_5": 0.09234738796949386,
|
|
"rewards/frontier_ece_reward": -0.02400104545522481,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17286376953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2227681815624237,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.390625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.086431884765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.086431884765625,
|
|
"signal/advantage_abs_mean": 0.12113675624132156,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12113675624132156,
|
|
"signal/advantage_pre_scale_std": 0.17278285920619965,
|
|
"signal/advantage_std": 0.17278285920619965,
|
|
"signal/brier_reward/centered_abs_mean": 0.1992181122303009,
|
|
"signal/brier_reward/group_std_mean": 0.24767497479915618,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02490226402878761,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02490226402878761,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07850513458251954,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1010200709104538,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009813141822814942,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009813141822814942,
|
|
"signal/format_reward/centered_abs_mean": 0.00452880859375,
|
|
"signal/format_reward/group_std_mean": 0.012921943468973041,
|
|
"signal/format_reward/group_zero_std_frac": 0.928125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002264404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.002264404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002785020461305976,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0042864244896918535,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.985186315025203e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.985186315025203e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17054200172424316,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23477787971496583,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17054200172424316,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23477787971496583,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17054200172424316,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23477787971496583,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17054200172424316,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23477787971496583,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17054200172424316,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23477787971496583,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17054200172424316,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23477787971496583,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030527016613632442,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.09855036437511444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.11995106637477874,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012318795546889305,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012318795546889305,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4567628620102736,
|
|
"calibration/batch_distribution_entropy": 0.9037929812843206,
|
|
"calibration/buffer_distribution_entropy": 0.782850349677702,
|
|
"calibration/confidence_entropy": 0.5351940352803781,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.051953125,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.158923750965474,
|
|
"calibration/mean_confidence": 0.538056644981475,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1076.8,
|
|
"completions/max_terminated_length": 350.6,
|
|
"completions/mean_length": 131.0107421875,
|
|
"completions/mean_terminated_length": 130.59954223632812,
|
|
"completions/min_length": 39.8,
|
|
"completions/min_terminated_length": 39.8,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0020931183826178312,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 117743491.0,
|
|
"reward": 0.9162874221801758,
|
|
"reward_std": 0.13698765337467195,
|
|
"rewards/accuracy_reward": 0.41279296875,
|
|
"rewards/brier_reward": 0.7357259631156922,
|
|
"rewards/confidence_uniqueness_reward": 0.8506728529930114,
|
|
"rewards/format_reward": 0.9978515625,
|
|
"rewards/frontier_aurc_reward": -0.0044169268570840355,
|
|
"rewards/frontier_coverage_1": 0.11645138710737228,
|
|
"rewards/frontier_coverage_10": 0.11645138710737228,
|
|
"rewards/frontier_coverage_15": 0.11645138710737228,
|
|
"rewards/frontier_coverage_20": 0.11645138710737228,
|
|
"rewards/frontier_coverage_25": 0.11645138710737228,
|
|
"rewards/frontier_coverage_5": 0.11645138710737228,
|
|
"rewards/frontier_ece_reward": 0.0018998330924659967,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.166387939453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.21394164264202117,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0831939697265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0831939697265625,
|
|
"signal/advantage_abs_mean": 0.10727472305297851,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10727472305297851,
|
|
"signal/advantage_pre_scale_std": 0.15276951789855958,
|
|
"signal/advantage_std": 0.15276951789855958,
|
|
"signal/brier_reward/centered_abs_mean": 0.18316833972930907,
|
|
"signal/brier_reward/group_std_mean": 0.2306816339492798,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022896042466163634,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022896042466163634,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07995835840702056,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10394333600997925,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00999479480087757,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00999479480087757,
|
|
"signal/format_reward/centered_abs_mean": 0.00416259765625,
|
|
"signal/format_reward/group_std_mean": 0.0121533976867795,
|
|
"signal/format_reward/group_zero_std_frac": 0.93125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002081298828125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.002081298828125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002050551865249872,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003289903746917844,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6704877129523086e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6704877129523086e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22418826520442964,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28972225487232206,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22418826520442964,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28972225487232206,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22418826520442964,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28972225487232206,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22418826520442964,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28972225487232206,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22418826520442964,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28972225487232206,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22418826520442964,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28972225487232206,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0040129697881639,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07737888991832734,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09668841660022735,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009672361239790917,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009672361239790917,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4731364410244561,
|
|
"calibration/batch_distribution_entropy": 0.8993261190445182,
|
|
"calibration/buffer_distribution_entropy": 0.8327576947575324,
|
|
"calibration/confidence_entropy": 0.5259689998069328,
|
|
"calibration/coverage@0%": 0.005091899773608073,
|
|
"calibration/coverage@1%": 0.005091899773608073,
|
|
"calibration/coverage@10%": 0.018033076244196308,
|
|
"calibration/coverage@15%": 0.021954644871647288,
|
|
"calibration/coverage@20%": 0.03956716933348682,
|
|
"calibration/coverage@25%": 0.07555059523809524,
|
|
"calibration/coverage@30%": 0.13774573656517403,
|
|
"calibration/coverage@5%": 0.012150723303019837,
|
|
"calibration/ece": 0.12738224827100658,
|
|
"calibration/mean_confidence": 0.40403795922689695,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 847.6,
|
|
"completions/max_terminated_length": 396.2,
|
|
"completions/mean_length": 141.90908203125,
|
|
"completions/mean_terminated_length": 141.50000915527343,
|
|
"completions/min_length": 53.6,
|
|
"completions/min_terminated_length": 53.6,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.001334143104031682,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 134113312.0,
|
|
"reward": 0.9246591687202453,
|
|
"reward_std": 0.11305393874645234,
|
|
"rewards/accuracy_reward": 0.40830078125,
|
|
"rewards/brier_reward": 0.761095380783081,
|
|
"rewards/confidence_uniqueness_reward": 0.8618135690689087,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.0039564462844282385,
|
|
"rewards/frontier_coverage_1": 0.1575838327407837,
|
|
"rewards/frontier_coverage_10": 0.1575838327407837,
|
|
"rewards/frontier_coverage_15": 0.1575838327407837,
|
|
"rewards/frontier_coverage_20": 0.1575838327407837,
|
|
"rewards/frontier_coverage_25": 0.1575838327407837,
|
|
"rewards/frontier_coverage_5": 0.1575838327407837,
|
|
"rewards/frontier_ece_reward": 0.009847322525456548,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.149713134765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.19379588663578035,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0748565673828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0748565673828125,
|
|
"signal/advantage_abs_mean": 0.08884423375129699,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08884423375129699,
|
|
"signal/advantage_pre_scale_std": 0.12785588651895524,
|
|
"signal/advantage_std": 0.12785588651895524,
|
|
"signal/brier_reward/centered_abs_mean": 0.1698179990053177,
|
|
"signal/brier_reward/group_std_mean": 0.21664086878299713,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122724987566471,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02122724987566471,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07381970435380936,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08946335166692734,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00922746304422617,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00922746304422617,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.0049718443769961596,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014080909080803395,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002259616693481803,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.520482667023316e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.520482667023316e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2586131691932678,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.32670770287513734,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2586131691932678,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.32670770287513734,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2586131691932678,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.32670770287513734,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2586131691932678,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.32670770287513734,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2586131691932678,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.32670770287513734,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2586131691932678,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.32670770287513734,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004629175364971161,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0536692775785923,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07274282872676849,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0067086596973240376,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0067086596973240376,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3318122292710428,
|
|
"calibration/batch_distribution_entropy": 0.9011091838112341,
|
|
"calibration/buffer_distribution_entropy": 0.8770609177707678,
|
|
"calibration/confidence_entropy": 0.47220691985763424,
|
|
"calibration/coverage@0%": 0.010561399217221134,
|
|
"calibration/coverage@1%": 0.010561399217221134,
|
|
"calibration/coverage@10%": 0.04851547211350293,
|
|
"calibration/coverage@15%": 0.10991163160469668,
|
|
"calibration/coverage@20%": 0.16115994985322896,
|
|
"calibration/coverage@25%": 0.2792380136986301,
|
|
"calibration/coverage@30%": 0.40509876467710376,
|
|
"calibration/coverage@5%": 0.0356103228962818,
|
|
"calibration/ece": 0.2681012210279697,
|
|
"calibration/mean_confidence": 0.34697361324306747,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1307.6,
|
|
"completions/max_terminated_length": 507.2,
|
|
"completions/mean_length": 148.73115234375,
|
|
"completions/mean_terminated_length": 148.05319519042968,
|
|
"completions/min_length": 53.8,
|
|
"completions/min_terminated_length": 53.8,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.0013268872862681746,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 150586751.0,
|
|
"reward": 0.9608087778091431,
|
|
"reward_std": 0.10770493298768997,
|
|
"rewards/accuracy_reward": 0.5029296875,
|
|
"rewards/brier_reward": 0.7304662704467774,
|
|
"rewards/confidence_uniqueness_reward": 0.8599502563476562,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.003411487862467766,
|
|
"rewards/frontier_coverage_1": 0.08409715853631497,
|
|
"rewards/frontier_coverage_10": 0.08409715853631497,
|
|
"rewards/frontier_coverage_15": 0.08409715853631497,
|
|
"rewards/frontier_coverage_20": 0.08409715853631497,
|
|
"rewards/frontier_coverage_25": 0.08409715853631497,
|
|
"rewards/frontier_coverage_5": 0.08409715853631497,
|
|
"rewards/frontier_ece_reward": 0.016473467275500298,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1483642578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.19843538403511046,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.41875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07418212890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07418212890625,
|
|
"signal/advantage_abs_mean": 0.08254445642232895,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08254445642232895,
|
|
"signal/advantage_pre_scale_std": 0.11901939809322357,
|
|
"signal/advantage_std": 0.11901939809322357,
|
|
"signal/brier_reward/centered_abs_mean": 0.1825536698102951,
|
|
"signal/brier_reward/group_std_mean": 0.22881582379341125,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02281920872628689,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02281920872628689,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07770240604877472,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09924467504024506,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00971280075609684,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00971280075609684,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_std_mean": 0.005524271540343762,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013431656872853637,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021427671890705824,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4042664517764933e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4042664517764933e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.28589142560958863,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.356065034866333,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.28589142560958863,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.356065034866333,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.28589142560958863,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.356065034866333,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.28589142560958863,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.356065034866333,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.28589142560958863,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.356065034866333,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.28589142560958863,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.356065034866333,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005117456335574389,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04525191038846969,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06264355853199959,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005656488798558712,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005656488798558712,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38946164222395224,
|
|
"calibration/batch_distribution_entropy": 0.8997395386342186,
|
|
"calibration/buffer_distribution_entropy": 0.9080234560993914,
|
|
"calibration/confidence_entropy": 0.45810094180544186,
|
|
"calibration/coverage@0%": 0.0023483365949119373,
|
|
"calibration/coverage@1%": 0.0023483365949119373,
|
|
"calibration/coverage@10%": 0.008610567514677103,
|
|
"calibration/coverage@15%": 0.012133072407045009,
|
|
"calibration/coverage@20%": 0.07978391442193315,
|
|
"calibration/coverage@25%": 0.13847784105080388,
|
|
"calibration/coverage@30%": 0.24525552130098616,
|
|
"calibration/coverage@5%": 0.0023483365949119373,
|
|
"calibration/ece": 0.17463787616505108,
|
|
"calibration/mean_confidence": 0.33009617318790924,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1233.4,
|
|
"completions/max_terminated_length": 568.2,
|
|
"completions/mean_length": 154.51337890625,
|
|
"completions/mean_terminated_length": 153.9742462158203,
|
|
"completions/min_length": 64.8,
|
|
"completions/min_terminated_length": 64.8,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0014597562840208411,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 167189896.0,
|
|
"reward": 0.9457063794136047,
|
|
"reward_std": 0.10457922667264938,
|
|
"rewards/accuracy_reward": 0.45458984375,
|
|
"rewards/brier_reward": 0.7449754357337952,
|
|
"rewards/confidence_uniqueness_reward": 0.8708751559257507,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.003416177164763212,
|
|
"rewards/frontier_coverage_1": 0.1375892072916031,
|
|
"rewards/frontier_coverage_10": 0.1375892072916031,
|
|
"rewards/frontier_coverage_15": 0.1375892072916031,
|
|
"rewards/frontier_coverage_20": 0.1375892072916031,
|
|
"rewards/frontier_coverage_25": 0.1375892072916031,
|
|
"rewards/frontier_coverage_5": 0.1375892072916031,
|
|
"rewards/frontier_ece_reward": 0.0168386897072196,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.139410400390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.18234478533267975,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0697052001953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0697052001953125,
|
|
"signal/advantage_abs_mean": 0.08101131021976471,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08101131021976471,
|
|
"signal/advantage_pre_scale_std": 0.11804848611354828,
|
|
"signal/advantage_std": 0.11804848611354828,
|
|
"signal/brier_reward/centered_abs_mean": 0.18030621111392975,
|
|
"signal/brier_reward/group_std_mean": 0.22819359302520753,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02253827638924122,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02253827638924122,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06919719129800797,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08767969161272049,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008649648912250996,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008649648912250996,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417306780815,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001562736975029111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002508711442351341,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7972989846603015e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7972989846603015e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2776048481464386,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.34865164160728457,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2776048481464386,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.34865164160728457,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2776048481464386,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.34865164160728457,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2776048481464386,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.34865164160728457,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2776048481464386,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.34865164160728457,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2776048481464386,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.34865164160728457,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004969126544892788,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.046651491522789,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06448797807097435,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005831436440348625,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005831436440348625,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.6191944124446507,
|
|
"eval_calibration/batch_distribution_entropy": 0.909546140975906,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9216257279279545,
|
|
"eval_calibration/confidence_entropy": 0.43642560073917536,
|
|
"eval_calibration/coverage@0%": 0.0078125,
|
|
"eval_calibration/coverage@1%": 0.0078125,
|
|
"eval_calibration/coverage@10%": 0.0078125,
|
|
"eval_calibration/coverage@15%": 0.0078125,
|
|
"eval_calibration/coverage@20%": 0.0078125,
|
|
"eval_calibration/coverage@25%": 0.0078125,
|
|
"eval_calibration/coverage@30%": 0.0078125,
|
|
"eval_calibration/coverage@5%": 0.0078125,
|
|
"eval_calibration/ece": 0.2444125504032258,
|
|
"eval_calibration/mean_confidence": 0.3953807963709677,
|
|
"eval_completions/clipped_ratio": 0.002155172413793094,
|
|
"eval_completions/max_length": 594.25,
|
|
"eval_completions/max_terminated_length": 312.0,
|
|
"eval_completions/mean_length": 162.88725662231445,
|
|
"eval_completions/mean_terminated_length": 159.92407989501953,
|
|
"eval_completions/min_length": 74.5,
|
|
"eval_completions/min_terminated_length": 74.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 167189896.0,
|
|
"eval_reward": 0.9074415266513824,
|
|
"eval_reward_std": 0.20979441329836845,
|
|
"eval_rewards/accuracy_reward": 0.369140625,
|
|
"eval_rewards/brier_reward": 0.7619887739419937,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.846884474158287,
|
|
"eval_rewards/format_reward": 0.99609375,
|
|
"eval_rewards/frontier_aurc_reward": -0.0038871413562446833,
|
|
"eval_rewards/frontier_coverage_1": 0.20465973764657974,
|
|
"eval_rewards/frontier_coverage_10": 0.20465973764657974,
|
|
"eval_rewards/frontier_coverage_15": 0.20465973764657974,
|
|
"eval_rewards/frontier_coverage_20": 0.20465973764657974,
|
|
"eval_rewards/frontier_coverage_25": 0.20465973764657974,
|
|
"eval_rewards/frontier_coverage_5": 0.20465973764657974,
|
|
"eval_rewards/frontier_ece_reward": 0.014434305019676685,
|
|
"eval_runtime": 26.2542,
|
|
"eval_samples_per_second": 19.045,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4490966796875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4801955074071884,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22454833984375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22454833984375,
|
|
"eval_signal/advantage_abs_mean": 0.18047761172056198,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.18047761172056198,
|
|
"eval_signal/advantage_pre_scale_std": 0.20848042145371437,
|
|
"eval_signal/advantage_std": 0.20848042145371437,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22210051491856575,
|
|
"eval_signal/brier_reward/group_std_mean": 0.27025653421878815,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02776256436482072,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02776256436482072,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07186052948236465,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09253636561334133,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008982566185295582,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008982566185295582,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.007568359375,
|
|
"eval_signal/format_reward/group_std_mean": 0.022097086533904076,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.875,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002644163556396961,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004254971107002348,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7330525376310106e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7330525376310106e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.42421814799308777,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.5166834145784378,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.42421814799308777,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.5166834145784378,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.42421814799308777,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.5166834145784378,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.42421814799308777,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.5166834145784378,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.42421814799308777,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.5166834145784378,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.42421814799308777,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.5166834145784378,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007593504618853331,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05587606783956289,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.08752950467169285,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006984508479945362,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006984508479945362,
|
|
"eval_steps_per_second": 0.152,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.40226900078093014,
|
|
"calibration/batch_distribution_entropy": 0.9552943382208323,
|
|
"calibration/buffer_distribution_entropy": 0.9272863852228177,
|
|
"calibration/confidence_entropy": 0.4636731554061452,
|
|
"calibration/coverage@0%": 0.00234375,
|
|
"calibration/coverage@1%": 0.00234375,
|
|
"calibration/coverage@10%": 0.00234375,
|
|
"calibration/coverage@15%": 0.003125,
|
|
"calibration/coverage@20%": 0.015234375,
|
|
"calibration/coverage@25%": 0.069921875,
|
|
"calibration/coverage@30%": 0.260546875,
|
|
"calibration/coverage@5%": 0.00234375,
|
|
"calibration/ece": 0.2050855796754508,
|
|
"calibration/mean_confidence": 0.4194716920787247,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 669.2,
|
|
"completions/max_terminated_length": 446.2,
|
|
"completions/mean_length": 159.76171875,
|
|
"completions/mean_terminated_length": 159.6274383544922,
|
|
"completions/min_length": 69.8,
|
|
"completions/min_terminated_length": 69.8,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0011799404164776206,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 184062976.0,
|
|
"reward": 0.9487934350967407,
|
|
"reward_std": 0.10535514205694199,
|
|
"rewards/accuracy_reward": 0.451953125,
|
|
"rewards/brier_reward": 0.7491334080696106,
|
|
"rewards/confidence_uniqueness_reward": 0.8902879357337952,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003473851131275296,
|
|
"rewards/frontier_coverage_1": 0.1475163072347641,
|
|
"rewards/frontier_coverage_10": 0.1475163072347641,
|
|
"rewards/frontier_coverage_15": 0.1475163072347641,
|
|
"rewards/frontier_coverage_20": 0.1475163072347641,
|
|
"rewards/frontier_coverage_25": 0.1475163072347641,
|
|
"rewards/frontier_coverage_5": 0.1475163072347641,
|
|
"rewards/frontier_ece_reward": 0.018036763183772564,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14241943359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.18591534495353698,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.48125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071209716796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.071209716796875,
|
|
"signal/advantage_abs_mean": 0.0813161700963974,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0813161700963974,
|
|
"signal/advantage_pre_scale_std": 0.11816369593143464,
|
|
"signal/advantage_std": 0.11816369593143464,
|
|
"signal/brier_reward/centered_abs_mean": 0.18312384486198424,
|
|
"signal/brier_reward/group_std_mean": 0.2311247318983078,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02289048060774803,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02289048060774803,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05273754522204399,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0655559055507183,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006592193152755499,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006592193152755499,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020946973469108342,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033506324514746668,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.749508032342419e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.749508032342419e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.27024593353271487,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3390821158885956,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.27024593353271487,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3390821158885956,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.27024593353271487,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3390821158885956,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.27024593353271487,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3390821158885956,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.27024593353271487,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3390821158885956,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.27024593353271487,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3390821158885956,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004837402049452067,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05012344047427177,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06734204888343812,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006265430059283972,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006265430059283972,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34397713585565626,
|
|
"calibration/batch_distribution_entropy": 0.9688834897018976,
|
|
"calibration/buffer_distribution_entropy": 0.9367661297882097,
|
|
"calibration/confidence_entropy": 0.45534865455503254,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.00625,
|
|
"calibration/coverage@15%": 0.037890625,
|
|
"calibration/coverage@20%": 0.123828125,
|
|
"calibration/coverage@25%": 0.208984375,
|
|
"calibration/coverage@30%": 0.38515625,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.12179229650095984,
|
|
"calibration/mean_confidence": 0.47636667340644767,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 658.8,
|
|
"completions/max_terminated_length": 420.6,
|
|
"completions/mean_length": 163.17666015625,
|
|
"completions/mean_terminated_length": 163.04219665527344,
|
|
"completions/min_length": 72.8,
|
|
"completions/min_terminated_length": 72.8,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.0012748718727380037,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 200548721.0,
|
|
"reward": 0.9616720676422119,
|
|
"reward_std": 0.11071353554725646,
|
|
"rewards/accuracy_reward": 0.4712890625,
|
|
"rewards/brier_reward": 0.7608612418174744,
|
|
"rewards/confidence_uniqueness_reward": 0.9034651756286621,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003351506870239973,
|
|
"rewards/frontier_coverage_1": 0.13997417837381362,
|
|
"rewards/frontier_coverage_10": 0.13997417837381362,
|
|
"rewards/frontier_coverage_15": 0.13997417837381362,
|
|
"rewards/frontier_coverage_20": 0.13997417837381362,
|
|
"rewards/frontier_coverage_25": 0.13997417837381362,
|
|
"rewards/frontier_coverage_5": 0.13997417837381362,
|
|
"rewards/frontier_ece_reward": 0.025279919058084487,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13929443359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.18363580107688904,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.475,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069647216796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.069647216796875,
|
|
"signal/advantage_abs_mean": 0.08572653234004975,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08572653234004975,
|
|
"signal/advantage_pre_scale_std": 0.1261049687862396,
|
|
"signal/advantage_std": 0.1261049687862396,
|
|
"signal/brier_reward/centered_abs_mean": 0.18710338175296784,
|
|
"signal/brier_reward/group_std_mean": 0.23497919142246246,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02338792271912098,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02338792271912098,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04473799243569374,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05513100624084473,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005592249054461718,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005592249054461718,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025330715347081424,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038898529950529338,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.534197796601802e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.534197796601802e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.25085292756557465,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.318107670545578,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.25085292756557465,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.318107670545578,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.25085292756557465,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.318107670545578,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.25085292756557465,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.318107670545578,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.25085292756557465,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.318107670545578,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.25085292756557465,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.318107670545578,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044902671128511425,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.057060886174440384,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0741629496216774,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007132610771805048,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007132610771805048,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3186970268959458,
|
|
"calibration/batch_distribution_entropy": 0.9612344061340921,
|
|
"calibration/buffer_distribution_entropy": 0.942231301780966,
|
|
"calibration/confidence_entropy": 0.44323483168206046,
|
|
"calibration/coverage@0%": 0.003515625,
|
|
"calibration/coverage@1%": 0.003515625,
|
|
"calibration/coverage@10%": 0.07578125,
|
|
"calibration/coverage@15%": 0.178125,
|
|
"calibration/coverage@20%": 0.316015625,
|
|
"calibration/coverage@25%": 0.45234375,
|
|
"calibration/coverage@30%": 0.526171875,
|
|
"calibration/coverage@5%": 0.003515625,
|
|
"calibration/ece": 0.16880598958333337,
|
|
"calibration/mean_confidence": 0.5302666666666667,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 635.8,
|
|
"completions/max_terminated_length": 635.8,
|
|
"completions/mean_length": 167.496875,
|
|
"completions/mean_terminated_length": 167.496875,
|
|
"completions/min_length": 72.0,
|
|
"completions/min_terminated_length": 72.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0012696352787315845,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 217296113.0,
|
|
"reward": 0.9835787534713745,
|
|
"reward_std": 0.115215602517128,
|
|
"rewards/accuracy_reward": 0.51943359375,
|
|
"rewards/brier_reward": 0.7641871452331543,
|
|
"rewards/confidence_uniqueness_reward": 0.9048604130744934,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002962963841855526,
|
|
"rewards/frontier_coverage_1": 0.10569853037595749,
|
|
"rewards/frontier_coverage_10": 0.10569853037595749,
|
|
"rewards/frontier_coverage_15": 0.10569853037595749,
|
|
"rewards/frontier_coverage_20": 0.10569853037595749,
|
|
"rewards/frontier_coverage_25": 0.10569853037595749,
|
|
"rewards/frontier_coverage_5": 0.10569853037595749,
|
|
"rewards/frontier_ece_reward": 0.032237300649285316,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.139007568359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.18477267920970916,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.475,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0695037841796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0695037841796875,
|
|
"signal/advantage_abs_mean": 0.08863357156515121,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08863357156515121,
|
|
"signal/advantage_pre_scale_std": 0.13259580731391907,
|
|
"signal/advantage_std": 0.13259580731391907,
|
|
"signal/brier_reward/centered_abs_mean": 0.18662929832935332,
|
|
"signal/brier_reward/group_std_mean": 0.23361400365829468,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023328662291169165,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023328662291169165,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05014804154634476,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.060537828505039214,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006268505193293095,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006268505193293095,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027079980354756117,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004054524842649699,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8473164497409016e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8473164497409016e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22929660975933075,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29457331299781797,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22929660975933075,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29457331299781797,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22929660975933075,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29457331299781797,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22929660975933075,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29457331299781797,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22929660975933075,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29457331299781797,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22929660975933075,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29457331299781797,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004104409227147698,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05887450873851776,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07482730895280838,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00735931359231472,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00735931359231472,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3285903790301356,
|
|
"calibration/batch_distribution_entropy": 0.9653098937115295,
|
|
"calibration/buffer_distribution_entropy": 0.9474725315085728,
|
|
"calibration/confidence_entropy": 0.4189063430289507,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.08909313725490195,
|
|
"calibration/coverage@15%": 0.17904105392156863,
|
|
"calibration/coverage@20%": 0.27637254901960784,
|
|
"calibration/coverage@25%": 0.4222089460784314,
|
|
"calibration/coverage@30%": 0.5321078431372549,
|
|
"calibration/coverage@5%": 0.01604626225490196,
|
|
"calibration/ece": 0.16827429222736107,
|
|
"calibration/mean_confidence": 0.4845619210079331,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1140.4,
|
|
"completions/max_terminated_length": 697.4,
|
|
"completions/mean_length": 167.4361328125,
|
|
"completions/mean_terminated_length": 166.9017761230469,
|
|
"completions/min_length": 64.4,
|
|
"completions/min_terminated_length": 64.4,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0010618689702823758,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 234163843.0,
|
|
"reward": 0.9629539489746094,
|
|
"reward_std": 0.11185683757066726,
|
|
"rewards/accuracy_reward": 0.468359375,
|
|
"rewards/brier_reward": 0.7660558819770813,
|
|
"rewards/confidence_uniqueness_reward": 0.9080687403678894,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0033203907776623966,
|
|
"rewards/frontier_coverage_1": 0.1524452567100525,
|
|
"rewards/frontier_coverage_10": 0.1524452567100525,
|
|
"rewards/frontier_coverage_15": 0.1524452567100525,
|
|
"rewards/frontier_coverage_20": 0.1524452567100525,
|
|
"rewards/frontier_coverage_25": 0.1524452567100525,
|
|
"rewards/frontier_coverage_5": 0.1524452567100525,
|
|
"rewards/frontier_ece_reward": 0.027517157793045043,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1264404296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.17330618500709533,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.478125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06322021484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06322021484375,
|
|
"signal/advantage_abs_mean": 0.08456142097711564,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08456142097711564,
|
|
"signal/advantage_pre_scale_std": 0.12961900383234023,
|
|
"signal/advantage_std": 0.12961900383234023,
|
|
"signal/brier_reward/centered_abs_mean": 0.1864775687456131,
|
|
"signal/brier_reward/group_std_mean": 0.23539321422576903,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02330969609320164,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02330969609320164,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.052434886991977694,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06445125937461853,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006554360873997212,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006554360873997212,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030171813908964396,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004486602451652289,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4007543803891166e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4007543803891166e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2229921907186508,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.28949338793754575,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2229921907186508,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.28949338793754575,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2229921907186508,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.28949338793754575,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2229921907186508,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.28949338793754575,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2229921907186508,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.28949338793754575,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2229921907186508,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.28949338793754575,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039915600791573524,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05467732772231102,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06959621906280518,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006834665965288878,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006834665965288878,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38145099671484345,
|
|
"calibration/batch_distribution_entropy": 0.9351019623512027,
|
|
"calibration/buffer_distribution_entropy": 0.9511017219264826,
|
|
"calibration/confidence_entropy": 0.41021463710127815,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.1296875,
|
|
"calibration/coverage@15%": 0.169140625,
|
|
"calibration/coverage@20%": 0.197265625,
|
|
"calibration/coverage@25%": 0.23046875,
|
|
"calibration/coverage@30%": 0.278125,
|
|
"calibration/coverage@5%": 0.0703125,
|
|
"calibration/ece": 0.18824602337447136,
|
|
"calibration/mean_confidence": 0.5374034910250385,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1337.2,
|
|
"completions/max_terminated_length": 494.0,
|
|
"completions/mean_length": 170.96103515625,
|
|
"completions/mean_terminated_length": 170.29381103515624,
|
|
"completions/min_length": 67.2,
|
|
"completions/min_terminated_length": 67.2,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0012356507359072566,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 251166164.0,
|
|
"reward": 0.9860649704933167,
|
|
"reward_std": 0.12122494280338288,
|
|
"rewards/accuracy_reward": 0.52705078125,
|
|
"rewards/brier_reward": 0.7560766577720642,
|
|
"rewards/confidence_uniqueness_reward": 0.9106106519699096,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.003134048730134964,
|
|
"rewards/frontier_coverage_1": 0.101060039550066,
|
|
"rewards/frontier_coverage_10": 0.101060039550066,
|
|
"rewards/frontier_coverage_15": 0.101060039550066,
|
|
"rewards/frontier_coverage_20": 0.101060039550066,
|
|
"rewards/frontier_coverage_25": 0.101060039550066,
|
|
"rewards/frontier_coverage_5": 0.101060039550066,
|
|
"rewards/frontier_ece_reward": 0.029981668666005136,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.150408935546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1955954134464264,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0752044677734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0752044677734375,
|
|
"signal/advantage_abs_mean": 0.09415247589349747,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09415247589349747,
|
|
"signal/advantage_pre_scale_std": 0.14144977927207947,
|
|
"signal/advantage_std": 0.14144977927207947,
|
|
"signal/brier_reward/centered_abs_mean": 0.19640157520771026,
|
|
"signal/brier_reward/group_std_mean": 0.24560691714286803,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024550196900963783,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.024550196900963783,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05692050457000732,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06995929852128029,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007115063071250915,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007115063071250915,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031411519274115564,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004663504846394062,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.622661701636389e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.622661701636389e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22368650436401366,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29344227313995364,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22368650436401366,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29344227313995364,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22368650436401366,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29344227313995364,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22368650436401366,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29344227313995364,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22368650436401366,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29344227313995364,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22368650436401366,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29344227313995364,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004003988299518824,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.055852291733026506,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07003621906042098,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006981536466628313,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006981536466628313,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28470953702454416,
|
|
"calibration/batch_distribution_entropy": 0.9208122623181048,
|
|
"calibration/buffer_distribution_entropy": 0.9522540519535683,
|
|
"calibration/confidence_entropy": 0.3878620407373435,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.1923449730919765,
|
|
"calibration/coverage@15%": 0.30060695939334636,
|
|
"calibration/coverage@20%": 0.39478733488258316,
|
|
"calibration/coverage@25%": 0.5057401235322896,
|
|
"calibration/coverage@30%": 0.6214056384540118,
|
|
"calibration/coverage@5%": 0.009375,
|
|
"calibration/ece": 0.13667208102921785,
|
|
"calibration/mean_confidence": 0.5453152864845467,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 933.6,
|
|
"completions/max_terminated_length": 493.0,
|
|
"completions/mean_length": 170.7779296875,
|
|
"completions/mean_terminated_length": 170.2452423095703,
|
|
"completions/min_length": 74.0,
|
|
"completions/min_terminated_length": 74.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0012276864144951105,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0016,
|
|
"num_tokens": 267969746.0,
|
|
"reward": 0.9812070965766907,
|
|
"reward_std": 0.1114748939871788,
|
|
"rewards/accuracy_reward": 0.5060546875,
|
|
"rewards/brier_reward": 0.7687627673149109,
|
|
"rewards/confidence_uniqueness_reward": 0.9120404958724976,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.003094889922067523,
|
|
"rewards/frontier_coverage_1": 0.1360874891281128,
|
|
"rewards/frontier_coverage_10": 0.1360874891281128,
|
|
"rewards/frontier_coverage_15": 0.1360874891281128,
|
|
"rewards/frontier_coverage_20": 0.1360874891281128,
|
|
"rewards/frontier_coverage_25": 0.1360874891281128,
|
|
"rewards/frontier_coverage_5": 0.1360874891281128,
|
|
"rewards/frontier_ece_reward": 0.030495237931609152,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13546142578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1735977828502655,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.521875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.067730712890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.067730712890625,
|
|
"signal/advantage_abs_mean": 0.08631241321563721,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08631241321563721,
|
|
"signal/advantage_pre_scale_std": 0.1342590034008026,
|
|
"signal/advantage_std": 0.1342590034008026,
|
|
"signal/brier_reward/centered_abs_mean": 0.18553363680839538,
|
|
"signal/brier_reward/group_std_mean": 0.2332346946001053,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023191704601049423,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023191704601049423,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05944240242242813,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07294412925839425,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0074303003028035166,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0074303003028035166,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003175769792869687,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004814452119171619,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6846276856958865e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6846276856958865e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2149658679962158,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27974134087562563,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2149658679962158,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27974134087562563,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2149658679962158,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27974134087562563,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2149658679962158,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27974134087562563,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2149658679962158,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27974134087562563,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2149658679962158,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27974134087562563,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003847888810560107,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04882029145956039,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06196781545877457,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006102536432445049,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006102536432445049,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3789066982157474,
|
|
"calibration/batch_distribution_entropy": 0.946901981250825,
|
|
"calibration/buffer_distribution_entropy": 0.9537924269959485,
|
|
"calibration/confidence_entropy": 0.4054690863640535,
|
|
"calibration/coverage@0%": 0.007421875,
|
|
"calibration/coverage@1%": 0.007421875,
|
|
"calibration/coverage@10%": 0.102734375,
|
|
"calibration/coverage@15%": 0.149609375,
|
|
"calibration/coverage@20%": 0.204296875,
|
|
"calibration/coverage@25%": 0.254296875,
|
|
"calibration/coverage@30%": 0.32421875,
|
|
"calibration/coverage@5%": 0.039453125,
|
|
"calibration/ece": 0.16809869559712115,
|
|
"calibration/mean_confidence": 0.5157878198455872,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 513.2,
|
|
"completions/max_terminated_length": 513.2,
|
|
"completions/mean_length": 179.4818359375,
|
|
"completions/mean_terminated_length": 179.4818359375,
|
|
"completions/min_length": 74.2,
|
|
"completions/min_terminated_length": 74.2,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.001049001351930201,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0002,
|
|
"num_tokens": 284773336.0,
|
|
"reward": 0.9761500954627991,
|
|
"reward_std": 0.105889230966568,
|
|
"rewards/accuracy_reward": 0.4970703125,
|
|
"rewards/brier_reward": 0.7569293856620789,
|
|
"rewards/confidence_uniqueness_reward": 0.9251906633377075,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0031034779269248245,
|
|
"rewards/frontier_coverage_1": 0.13408799916505815,
|
|
"rewards/frontier_coverage_10": 0.13408799916505815,
|
|
"rewards/frontier_coverage_15": 0.13408799916505815,
|
|
"rewards/frontier_coverage_20": 0.13408799916505815,
|
|
"rewards/frontier_coverage_25": 0.13408799916505815,
|
|
"rewards/frontier_coverage_5": 0.13408799916505815,
|
|
"rewards/frontier_ece_reward": 0.024426154047250747,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12740478515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.16262791752815248,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.063702392578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.063702392578125,
|
|
"signal/advantage_abs_mean": 0.08320691287517548,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08320691287517548,
|
|
"signal/advantage_pre_scale_std": 0.12677900344133378,
|
|
"signal/advantage_std": 0.12677900344133378,
|
|
"signal/brier_reward/centered_abs_mean": 0.1897197872400284,
|
|
"signal/brier_reward/group_std_mean": 0.23877674639225005,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02371497340500355,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02371497340500355,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04770283699035645,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.058389055728912356,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005962854623794556,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005962854623794556,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002825378580018878,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004264938598498702,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0574273336678745e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0574273336678745e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22668876647949218,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29389293789863585,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22668876647949218,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29389293789863585,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22668876647949218,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29389293789863585,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22668876647949218,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29389293789863585,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22668876647949218,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29389293789863585,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22668876647949218,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29389293789863585,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004057728871703148,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04503390789031982,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05716151520609856,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005629238486289978,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005629238486289978,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3297805723752279,
|
|
"calibration/batch_distribution_entropy": 0.9416902309925609,
|
|
"calibration/buffer_distribution_entropy": 0.9557326159162518,
|
|
"calibration/confidence_entropy": 0.4033264689268282,
|
|
"calibration/coverage@0%": 0.006653620352250489,
|
|
"calibration/coverage@1%": 0.006653620352250489,
|
|
"calibration/coverage@10%": 0.006653620352250489,
|
|
"calibration/coverage@15%": 0.08688845401174168,
|
|
"calibration/coverage@20%": 0.2244228534735812,
|
|
"calibration/coverage@25%": 0.28773391634050877,
|
|
"calibration/coverage@30%": 0.37174581090998043,
|
|
"calibration/coverage@5%": 0.006653620352250489,
|
|
"calibration/ece": 0.1285960999935037,
|
|
"calibration/mean_confidence": 0.5267547320265696,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 991.8,
|
|
"completions/max_terminated_length": 555.6,
|
|
"completions/mean_length": 178.51474609375,
|
|
"completions/mean_terminated_length": 178.11636047363282,
|
|
"completions/min_length": 77.4,
|
|
"completions/min_terminated_length": 77.4,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.001152192009612918,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 301559503.0,
|
|
"reward": 0.9819895029067993,
|
|
"reward_std": 0.10433640331029892,
|
|
"rewards/accuracy_reward": 0.50400390625,
|
|
"rewards/brier_reward": 0.7620156645774842,
|
|
"rewards/confidence_uniqueness_reward": 0.9369817614555359,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002946482878178358,
|
|
"rewards/frontier_coverage_1": 0.13725561499595643,
|
|
"rewards/frontier_coverage_10": 0.13725561499595643,
|
|
"rewards/frontier_coverage_15": 0.13725561499595643,
|
|
"rewards/frontier_coverage_20": 0.13725561499595643,
|
|
"rewards/frontier_coverage_25": 0.13725561499595643,
|
|
"rewards/frontier_coverage_5": 0.13725561499595643,
|
|
"rewards/frontier_ece_reward": 0.024566837400197983,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.129986572265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.17214542329311372,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0649932861328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0649932861328125,
|
|
"signal/advantage_abs_mean": 0.07953081279993057,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07953081279993057,
|
|
"signal/advantage_pre_scale_std": 0.12358220815658569,
|
|
"signal/advantage_std": 0.12358220815658569,
|
|
"signal/brier_reward/centered_abs_mean": 0.1853651374578476,
|
|
"signal/brier_reward/group_std_mean": 0.23431913554668427,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02317064218223095,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02317064218223095,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.037301665544509886,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04643301069736481,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004662708193063736,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004662708193063736,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025432564318180082,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003974350774660707,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5524286542786284e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5524286542786284e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.232001656293869,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2984708070755005,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.232001656293869,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2984708070755005,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.232001656293869,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2984708070755005,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.232001656293869,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2984708070755005,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.232001656293869,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2984708070755005,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.232001656293869,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2984708070755005,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004152829479426146,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04061479941010475,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.051888493448495866,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005076849926263094,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005076849926263094,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29826600631964345,
|
|
"calibration/batch_distribution_entropy": 0.9458930250074676,
|
|
"calibration/buffer_distribution_entropy": 0.9569944190219012,
|
|
"calibration/confidence_entropy": 0.41124185655389256,
|
|
"calibration/coverage@0%": 0.00859375,
|
|
"calibration/coverage@1%": 0.00859375,
|
|
"calibration/coverage@10%": 0.07421875,
|
|
"calibration/coverage@15%": 0.15625,
|
|
"calibration/coverage@20%": 0.2375,
|
|
"calibration/coverage@25%": 0.36328125,
|
|
"calibration/coverage@30%": 0.48671875,
|
|
"calibration/coverage@5%": 0.025390625,
|
|
"calibration/ece": 0.1401060364598731,
|
|
"calibration/mean_confidence": 0.5169087501931073,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 765.4,
|
|
"completions/max_terminated_length": 564.4,
|
|
"completions/mean_length": 184.74921875,
|
|
"completions/mean_terminated_length": 184.61671752929686,
|
|
"completions/min_length": 77.2,
|
|
"completions/min_terminated_length": 77.2,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0009637068142183125,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 318381287.0,
|
|
"reward": 0.9803183794021606,
|
|
"reward_std": 0.09998638182878494,
|
|
"rewards/accuracy_reward": 0.50380859375,
|
|
"rewards/brier_reward": 0.7553321838378906,
|
|
"rewards/confidence_uniqueness_reward": 0.9388602018356323,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0029232859145849944,
|
|
"rewards/frontier_coverage_1": 0.13151973783969878,
|
|
"rewards/frontier_coverage_10": 0.13151973783969878,
|
|
"rewards/frontier_coverage_15": 0.13151973783969878,
|
|
"rewards/frontier_coverage_20": 0.13151973783969878,
|
|
"rewards/frontier_coverage_25": 0.13151973783969878,
|
|
"rewards/frontier_coverage_5": 0.13151973783969878,
|
|
"rewards/frontier_ece_reward": 0.020927964150905608,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.127679443359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.17264682054519653,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0638397216796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0638397216796875,
|
|
"signal/advantage_abs_mean": 0.07606031596660615,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07606031596660615,
|
|
"signal/advantage_pre_scale_std": 0.11618665158748627,
|
|
"signal/advantage_std": 0.11618665158748627,
|
|
"signal/brier_reward/centered_abs_mean": 0.1843687653541565,
|
|
"signal/brier_reward/group_std_mean": 0.23427461981773376,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023046095669269562,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023046095669269562,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03518189340829849,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04454438164830208,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004397736676037311,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004397736676037311,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023705217288807036,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036838185507804154,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.243233852321282e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.243233852321282e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2418591320514679,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.31356959939002993,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2418591320514679,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.31356959939002993,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2418591320514679,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.31356959939002993,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2418591320514679,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.31356959939002993,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2418591320514679,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.31356959939002993,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2418591320514679,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.31356959939002993,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004329278413206339,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03723434209823608,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04741183742880821,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00465429276227951,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00465429276227951,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2544567539913821,
|
|
"calibration/batch_distribution_entropy": 0.9350166127572491,
|
|
"calibration/buffer_distribution_entropy": 0.958189052231246,
|
|
"calibration/confidence_entropy": 0.39907110397213336,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.2701145119863014,
|
|
"calibration/coverage@15%": 0.37330219545009785,
|
|
"calibration/coverage@20%": 0.45343153742661446,
|
|
"calibration/coverage@25%": 0.509323018590998,
|
|
"calibration/coverage@30%": 0.5761764615949119,
|
|
"calibration/coverage@5%": 0.11763851516634052,
|
|
"calibration/ece": 0.14744758144939113,
|
|
"calibration/mean_confidence": 0.5285503908524433,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1123.4,
|
|
"completions/max_terminated_length": 705.4,
|
|
"completions/mean_length": 187.6494140625,
|
|
"completions/mean_terminated_length": 186.8601806640625,
|
|
"completions/min_length": 81.0,
|
|
"completions/min_terminated_length": 81.0,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0008596840780228376,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0019,
|
|
"num_tokens": 335391521.0,
|
|
"reward": 0.9921112775802612,
|
|
"reward_std": 0.09160036891698838,
|
|
"rewards/accuracy_reward": 0.51875,
|
|
"rewards/brier_reward": 0.7743848919868469,
|
|
"rewards/confidence_uniqueness_reward": 0.9427272796630859,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0026898517040535807,
|
|
"rewards/frontier_coverage_1": 0.1416488030925393,
|
|
"rewards/frontier_coverage_10": 0.1416488030925393,
|
|
"rewards/frontier_coverage_15": 0.1416488030925393,
|
|
"rewards/frontier_coverage_20": 0.1416488030925393,
|
|
"rewards/frontier_coverage_25": 0.1416488030925393,
|
|
"rewards/frontier_coverage_5": 0.1416488030925393,
|
|
"rewards/frontier_ece_reward": 0.026192883402109145,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0979736328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1366899386048317,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04898681640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04898681640625,
|
|
"signal/advantage_abs_mean": 0.06886067688465118,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06886067688465118,
|
|
"signal/advantage_pre_scale_std": 0.11252744793891907,
|
|
"signal/advantage_std": 0.11252744793891907,
|
|
"signal/brier_reward/centered_abs_mean": 0.16976939141750336,
|
|
"signal/brier_reward/group_std_mean": 0.21671704649925233,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122117392718792,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02122117392718792,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03321526050567627,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04269362464547157,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0041519075632095335,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041519075632095335,
|
|
"signal/format_reward/centered_abs_mean": 0.001300048828125,
|
|
"signal/format_reward/group_std_mean": 0.0031943732872605326,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025342844892293215,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038999815471470354,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.536369087873027e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.536369087873027e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20525244176387786,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2671793639659882,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20525244176387786,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2671793639659882,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20525244176387786,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2671793639659882,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20525244176387786,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2671793639659882,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20525244176387786,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2671793639659882,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20525244176387786,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2671793639659882,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036740186624228954,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.037545930594205856,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04693235754966736,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004693241324275732,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004693241324275732,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.4974161459569359,
|
|
"eval_calibration/batch_distribution_entropy": 0.8600728019210881,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9585032256106852,
|
|
"eval_calibration/confidence_entropy": 0.39179126853215673,
|
|
"eval_calibration/coverage@0%": 0.0625,
|
|
"eval_calibration/coverage@1%": 0.0625,
|
|
"eval_calibration/coverage@10%": 0.0625,
|
|
"eval_calibration/coverage@15%": 0.078125,
|
|
"eval_calibration/coverage@20%": 0.140625,
|
|
"eval_calibration/coverage@25%": 0.1796875,
|
|
"eval_calibration/coverage@30%": 0.265625,
|
|
"eval_calibration/coverage@5%": 0.0625,
|
|
"eval_calibration/ece": 0.20821445704912134,
|
|
"eval_calibration/mean_confidence": 0.45275429295087866,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 348.75,
|
|
"eval_completions/max_terminated_length": 348.75,
|
|
"eval_completions/mean_length": 189.11557006835938,
|
|
"eval_completions/mean_terminated_length": 189.11557006835938,
|
|
"eval_completions/min_length": 98.75,
|
|
"eval_completions/min_terminated_length": 98.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 335391521.0,
|
|
"eval_reward": 0.9335773140192032,
|
|
"eval_reward_std": 0.22667960077524185,
|
|
"eval_rewards/accuracy_reward": 0.40234375,
|
|
"eval_rewards/brier_reward": 0.7676407843828201,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.89111328125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0035294744884595275,
|
|
"eval_rewards/frontier_coverage_1": 0.21452518552541733,
|
|
"eval_rewards/frontier_coverage_10": 0.21452518552541733,
|
|
"eval_rewards/frontier_coverage_15": 0.21452518552541733,
|
|
"eval_rewards/frontier_coverage_20": 0.21452518552541733,
|
|
"eval_rewards/frontier_coverage_25": 0.21452518552541733,
|
|
"eval_rewards/frontier_coverage_5": 0.21452518552541733,
|
|
"eval_rewards/frontier_ece_reward": 0.01667470997199416,
|
|
"eval_runtime": 18.5202,
|
|
"eval_samples_per_second": 26.998,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.468994140625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49156785011291504,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2344970703125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2344970703125,
|
|
"eval_signal/advantage_abs_mean": 0.20321417972445488,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20321417972445488,
|
|
"eval_signal/advantage_pre_scale_std": 0.22419220209121704,
|
|
"eval_signal/advantage_std": 0.22419220209121704,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.24414894357323647,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2980574741959572,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030518617946654558,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.030518617946654558,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0486907958984375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05951074603945017,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0060863494873046875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0060863494873046875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0035710909869521856,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005775783443823457,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.392252726072911e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.392252726072911e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.4047232046723366,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4975067600607872,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.4047232046723366,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4975067600607872,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.4047232046723366,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4975067600607872,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.4047232046723366,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4975067600607872,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.4047232046723366,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4975067600607872,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.4047232046723366,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4975067600607872,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007244544918648899,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.04583758395165205,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.06471480429172516,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005729697993956506,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005729697993956506,
|
|
"eval_steps_per_second": 0.216,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2958493535528611,
|
|
"calibration/batch_distribution_entropy": 0.9413453167838378,
|
|
"calibration/buffer_distribution_entropy": 0.9608605051679421,
|
|
"calibration/confidence_entropy": 0.4114441599205099,
|
|
"calibration/coverage@0%": 0.011328125,
|
|
"calibration/coverage@1%": 0.011328125,
|
|
"calibration/coverage@10%": 0.06491484222113503,
|
|
"calibration/coverage@15%": 0.13764829990215263,
|
|
"calibration/coverage@20%": 0.22517505503913893,
|
|
"calibration/coverage@25%": 0.3994274400684931,
|
|
"calibration/coverage@30%": 0.5850224743150685,
|
|
"calibration/coverage@5%": 0.011328125,
|
|
"calibration/ece": 0.1434243952954204,
|
|
"calibration/mean_confidence": 0.49109226134555123,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1100.6,
|
|
"completions/max_terminated_length": 520.0,
|
|
"completions/mean_length": 190.394921875,
|
|
"completions/mean_terminated_length": 189.8685272216797,
|
|
"completions/min_length": 82.8,
|
|
"completions/min_terminated_length": 82.8,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0011786021059378982,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0017,
|
|
"num_tokens": 352063597.0,
|
|
"reward": 0.9939133524894714,
|
|
"reward_std": 0.09802540838718414,
|
|
"rewards/accuracy_reward": 0.526171875,
|
|
"rewards/brier_reward": 0.7703649401664734,
|
|
"rewards/confidence_uniqueness_reward": 0.9453340649604798,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.002737466990947723,
|
|
"rewards/frontier_coverage_1": 0.1277718111872673,
|
|
"rewards/frontier_coverage_10": 0.1277718111872673,
|
|
"rewards/frontier_coverage_15": 0.1277718111872673,
|
|
"rewards/frontier_coverage_20": 0.1277718111872673,
|
|
"rewards/frontier_coverage_25": 0.1277718111872673,
|
|
"rewards/frontier_coverage_5": 0.1277718111872673,
|
|
"rewards/frontier_ece_reward": 0.02348385229706764,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11649169921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1568697527050972,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.54375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.058245849609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.058245849609375,
|
|
"signal/advantage_abs_mean": 0.07433497905731201,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07433497905731201,
|
|
"signal/advantage_pre_scale_std": 0.11875344961881637,
|
|
"signal/advantage_std": 0.11875344961881637,
|
|
"signal/brier_reward/centered_abs_mean": 0.17236365377902985,
|
|
"signal/brier_reward/group_std_mean": 0.21735928058624268,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02154545672237873,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02154545672237873,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030726969614624976,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03968273177742958,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003840871201828122,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003840871201828122,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025001152884215117,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00392577862367034,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4752060784958306e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4752060784958306e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21679833829402922,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2767444133758545,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21679833829402922,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2767444133758545,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21679833829402922,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2767444133758545,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21679833829402922,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2767444133758545,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21679833829402922,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2767444133758545,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21679833829402922,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2767444133758545,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003880690271034837,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03399265930056572,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.043000844120979306,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004249082412570715,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004249082412570715,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33002620817581574,
|
|
"calibration/batch_distribution_entropy": 0.8972123286813798,
|
|
"calibration/buffer_distribution_entropy": 0.9671293966456596,
|
|
"calibration/confidence_entropy": 0.37335880071175015,
|
|
"calibration/coverage@0%": 0.026593077299412914,
|
|
"calibration/coverage@1%": 0.026593077299412914,
|
|
"calibration/coverage@10%": 0.14979283879647748,
|
|
"calibration/coverage@15%": 0.26584056996086103,
|
|
"calibration/coverage@20%": 0.3733717588062623,
|
|
"calibration/coverage@25%": 0.42732387475538164,
|
|
"calibration/coverage@30%": 0.49458628913894326,
|
|
"calibration/coverage@5%": 0.047296202299412914,
|
|
"calibration/ece": 0.14116460423344823,
|
|
"calibration/mean_confidence": 0.43884960142619966,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1208.4,
|
|
"completions/max_terminated_length": 605.8,
|
|
"completions/mean_length": 191.46328125,
|
|
"completions/mean_terminated_length": 190.8067199707031,
|
|
"completions/min_length": 84.4,
|
|
"completions/min_terminated_length": 84.4,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0026657464914023876,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 369284597.0,
|
|
"reward": 0.9673644065856933,
|
|
"reward_std": 0.09604953676462173,
|
|
"rewards/accuracy_reward": 0.464453125,
|
|
"rewards/brier_reward": 0.7727057933807373,
|
|
"rewards/confidence_uniqueness_reward": 0.9422548532485961,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0032384898513555527,
|
|
"rewards/frontier_coverage_1": 0.17688873410224915,
|
|
"rewards/frontier_coverage_10": 0.17688873410224915,
|
|
"rewards/frontier_coverage_15": 0.17688873410224915,
|
|
"rewards/frontier_coverage_20": 0.17688873410224915,
|
|
"rewards/frontier_coverage_25": 0.17688873410224915,
|
|
"rewards/frontier_coverage_5": 0.17688873410224915,
|
|
"rewards/frontier_ece_reward": 0.017748223431408406,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11265869140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15127451121807098,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056329345703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.056329345703125,
|
|
"signal/advantage_abs_mean": 0.07229470312595368,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07229470312595368,
|
|
"signal/advantage_pre_scale_std": 0.11684101819992065,
|
|
"signal/advantage_std": 0.11684101819992065,
|
|
"signal/brier_reward/centered_abs_mean": 0.16957641541957855,
|
|
"signal/brier_reward/group_std_mean": 0.2178028106689453,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02119705192744732,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02119705192744732,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03406139500439167,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.044897759705781935,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004257674375548959,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004257674375548959,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028988351114094257,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004550885502249002,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.188914583413862e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.188914583413862e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21616642773151398,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27745549082756044,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21616642773151398,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27745549082756044,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21616642773151398,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27745549082756044,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21616642773151398,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27745549082756044,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21616642773151398,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27745549082756044,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21616642773151398,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27745549082756044,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003869378939270973,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.028543695434927942,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03598736748099327,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035679619293659927,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035679619293659927,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34684431490162665,
|
|
"calibration/batch_distribution_entropy": 0.9249764330331744,
|
|
"calibration/buffer_distribution_entropy": 0.9721049923704769,
|
|
"calibration/confidence_entropy": 0.3966501657494727,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.048828125,
|
|
"calibration/coverage@15%": 0.083984375,
|
|
"calibration/coverage@20%": 0.240234375,
|
|
"calibration/coverage@25%": 0.347265625,
|
|
"calibration/coverage@30%": 0.48951122186888457,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.14233482628214694,
|
|
"calibration/mean_confidence": 0.5210622953382034,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 855.8,
|
|
"completions/max_terminated_length": 648.0,
|
|
"completions/mean_length": 191.449609375,
|
|
"completions/mean_terminated_length": 191.1877471923828,
|
|
"completions/min_length": 83.2,
|
|
"completions/min_terminated_length": 83.2,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0011664318153634667,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 386310513.0,
|
|
"reward": 0.980566680431366,
|
|
"reward_std": 0.0891783744096756,
|
|
"rewards/accuracy_reward": 0.49208984375,
|
|
"rewards/brier_reward": 0.7773085832595825,
|
|
"rewards/confidence_uniqueness_reward": 0.9470725178718566,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0032397733069956304,
|
|
"rewards/frontier_coverage_1": 0.15876154750585555,
|
|
"rewards/frontier_coverage_10": 0.15876154750585555,
|
|
"rewards/frontier_coverage_15": 0.15876154750585555,
|
|
"rewards/frontier_coverage_20": 0.15876154750585555,
|
|
"rewards/frontier_coverage_25": 0.15876154750585555,
|
|
"rewards/frontier_coverage_5": 0.15876154750585555,
|
|
"rewards/frontier_ece_reward": 0.016630425490438937,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.102740478515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13781636953353882,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0513702392578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0513702392578125,
|
|
"signal/advantage_abs_mean": 0.06816617250442505,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06816617250442505,
|
|
"signal/advantage_pre_scale_std": 0.11163422465324402,
|
|
"signal/advantage_std": 0.11163422465324402,
|
|
"signal/brier_reward/centered_abs_mean": 0.16006246507167815,
|
|
"signal/brier_reward/group_std_mean": 0.20494329929351807,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02000780813395977,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02000780813395977,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028726159036159514,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03694523498415947,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035907698795199392,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035907698795199392,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028500501066446304,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004479775950312614,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1015896315220746e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1015896315220746e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19571449756622314,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2562991797924042,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19571449756622314,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2562991797924042,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19571449756622314,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2562991797924042,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19571449756622314,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2562991797924042,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19571449756622314,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2562991797924042,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19571449756622314,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2562991797924042,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035032893996685744,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.023004084080457687,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.028804820030927658,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002875510510057211,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002875510510057211,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33882551551583673,
|
|
"calibration/batch_distribution_entropy": 0.9004682009504199,
|
|
"calibration/buffer_distribution_entropy": 0.9747984378991879,
|
|
"calibration/confidence_entropy": 0.3752828914999894,
|
|
"calibration/coverage@0%": 0.03828125,
|
|
"calibration/coverage@1%": 0.03828125,
|
|
"calibration/coverage@10%": 0.166796875,
|
|
"calibration/coverage@15%": 0.23203125,
|
|
"calibration/coverage@20%": 0.294921875,
|
|
"calibration/coverage@25%": 0.3328125,
|
|
"calibration/coverage@30%": 0.390234375,
|
|
"calibration/coverage@5%": 0.063671875,
|
|
"calibration/ece": 0.14781379384913512,
|
|
"calibration/mean_confidence": 0.46431405798825054,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1023.8,
|
|
"completions/max_terminated_length": 643.2,
|
|
"completions/mean_length": 190.45791015625,
|
|
"completions/mean_terminated_length": 189.801416015625,
|
|
"completions/min_length": 84.2,
|
|
"completions/min_terminated_length": 84.2,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0009418850531801581,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 403117314.0,
|
|
"reward": 0.9945974946022034,
|
|
"reward_std": 0.08855005800724029,
|
|
"rewards/accuracy_reward": 0.52099609375,
|
|
"rewards/brier_reward": 0.7850892663002014,
|
|
"rewards/confidence_uniqueness_reward": 0.9485597133636474,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0032369979191571472,
|
|
"rewards/frontier_coverage_1": 0.1466797597706318,
|
|
"rewards/frontier_coverage_10": 0.1466797597706318,
|
|
"rewards/frontier_coverage_15": 0.1466797597706318,
|
|
"rewards/frontier_coverage_20": 0.1466797597706318,
|
|
"rewards/frontier_coverage_25": 0.1466797597706318,
|
|
"rewards/frontier_coverage_5": 0.1466797597706318,
|
|
"rewards/frontier_ece_reward": 0.01553578432649374,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.106524658203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14518197476863862,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.565625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0532623291015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0532623291015625,
|
|
"signal/advantage_abs_mean": 0.06687061563134193,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06687061563134193,
|
|
"signal/advantage_pre_scale_std": 0.11217147409915924,
|
|
"signal/advantage_std": 0.11217147409915924,
|
|
"signal/brier_reward/centered_abs_mean": 0.1526328980922699,
|
|
"signal/brier_reward/group_std_mean": 0.19572176933288574,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019079112261533738,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019079112261533738,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027018361538648606,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03468450009822845,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033772951923310758,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033772951923310758,
|
|
"signal/format_reward/centered_abs_mean": 0.000872802734375,
|
|
"signal/format_reward/group_std_mean": 0.0016024607699364423,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004364013671875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004364013671875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003183392807841301,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004988422710448503,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6982728710863737e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6982728710863737e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18951932489871978,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24623486995697022,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18951932489871978,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24623486995697022,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18951932489871978,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24623486995697022,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18951932489871978,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24623486995697022,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18951932489871978,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24623486995697022,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18951932489871978,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24623486995697022,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033923957496881487,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019004416465759278,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02362184412777424,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023755520582199098,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023755520582199098,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.43027029729660676,
|
|
"calibration/batch_distribution_entropy": 0.9443037742377609,
|
|
"calibration/buffer_distribution_entropy": 0.9749766621972892,
|
|
"calibration/confidence_entropy": 0.420648326243084,
|
|
"calibration/coverage@0%": 0.000392156862745098,
|
|
"calibration/coverage@1%": 0.000392156862745098,
|
|
"calibration/coverage@10%": 0.000392156862745098,
|
|
"calibration/coverage@15%": 0.003126531862745098,
|
|
"calibration/coverage@20%": 0.009376531862745098,
|
|
"calibration/coverage@25%": 0.02657014732262768,
|
|
"calibration/coverage@30%": 0.17616181446893825,
|
|
"calibration/coverage@5%": 0.000392156862745098,
|
|
"calibration/ece": 0.19883478146558858,
|
|
"calibration/mean_confidence": 0.5291464915604904,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1158.6,
|
|
"completions/max_terminated_length": 692.4,
|
|
"completions/mean_length": 190.79619140625,
|
|
"completions/mean_terminated_length": 190.40262451171876,
|
|
"completions/min_length": 91.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.001098749809898436,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 420107515.0,
|
|
"reward": 0.9793721914291382,
|
|
"reward_std": 0.09981218427419662,
|
|
"rewards/accuracy_reward": 0.49814453125,
|
|
"rewards/brier_reward": 0.7662390470504761,
|
|
"rewards/confidence_uniqueness_reward": 0.9503534436225891,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.004101655632257462,
|
|
"rewards/frontier_coverage_1": 0.13619700074195862,
|
|
"rewards/frontier_coverage_10": 0.13619700074195862,
|
|
"rewards/frontier_coverage_15": 0.13619700074195862,
|
|
"rewards/frontier_coverage_20": 0.13619700074195862,
|
|
"rewards/frontier_coverage_25": 0.1293622836470604,
|
|
"rewards/frontier_coverage_5": 0.13619700074195862,
|
|
"rewards/frontier_ece_reward": 0.01191479042172432,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.123504638671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1655549794435501,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.521875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0617523193359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0617523193359375,
|
|
"signal/advantage_abs_mean": 0.07545655816793442,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07545655816793442,
|
|
"signal/advantage_pre_scale_std": 0.12451154887676238,
|
|
"signal/advantage_std": 0.12451154887676238,
|
|
"signal/brier_reward/centered_abs_mean": 0.16211409568786622,
|
|
"signal/brier_reward/group_std_mean": 0.20733815133571626,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020264261960983278,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020264261960983278,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02588532753288746,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.034060098230838776,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032356659416109324,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032356659416109324,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004065265553072095,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006544529832899571,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.276825199369341e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.276825199369341e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1825489729642868,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2414218693971634,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1825489729642868,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2414218693971634,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1825489729642868,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2414218693971634,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1825489729642868,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2414218693971634,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17083930373191833,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.226739364862442,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030580234713852407,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030580234713852407,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1825489729642868,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2414218693971634,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032676266506314277,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.017164209112524986,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02124990485608578,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002145526139065623,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002145526139065623,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3307413637301698,
|
|
"calibration/batch_distribution_entropy": 0.9539248294554381,
|
|
"calibration/buffer_distribution_entropy": 0.974026721883375,
|
|
"calibration/confidence_entropy": 0.445160747267321,
|
|
"calibration/coverage@0%": 0.00078125,
|
|
"calibration/coverage@1%": 0.00078125,
|
|
"calibration/coverage@10%": 0.00078125,
|
|
"calibration/coverage@15%": 0.0332665728962818,
|
|
"calibration/coverage@20%": 0.14390670865949118,
|
|
"calibration/coverage@25%": 0.2673984833659491,
|
|
"calibration/coverage@30%": 0.40337114726027395,
|
|
"calibration/coverage@5%": 0.00078125,
|
|
"calibration/ece": 0.12327517119786222,
|
|
"calibration/mean_confidence": 0.54675080181613,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 957.0,
|
|
"completions/max_terminated_length": 581.0,
|
|
"completions/mean_length": 193.95068359375,
|
|
"completions/mean_terminated_length": 193.68844604492188,
|
|
"completions/min_length": 90.6,
|
|
"completions/min_terminated_length": 90.6,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.001056396751664579,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 436974754.0,
|
|
"reward": 0.9854490041732789,
|
|
"reward_std": 0.09411467611789703,
|
|
"rewards/accuracy_reward": 0.5017578125,
|
|
"rewards/brier_reward": 0.7856782793998718,
|
|
"rewards/confidence_uniqueness_reward": 0.9530214190483093,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0033007480669766665,
|
|
"rewards/frontier_coverage_1": 0.1507933869957924,
|
|
"rewards/frontier_coverage_10": 0.1507933869957924,
|
|
"rewards/frontier_coverage_15": 0.1507933869957924,
|
|
"rewards/frontier_coverage_20": 0.1507933869957924,
|
|
"rewards/frontier_coverage_25": 0.14029909968376159,
|
|
"rewards/frontier_coverage_5": 0.1507933869957924,
|
|
"rewards/frontier_ece_reward": 0.01144680418074131,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12384033203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.15653499066829682,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.061920166015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.061920166015625,
|
|
"signal/advantage_abs_mean": 0.07388749271631241,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07388749271631241,
|
|
"signal/advantage_pre_scale_std": 0.12038870304822921,
|
|
"signal/advantage_std": 0.12038870304822921,
|
|
"signal/brier_reward/centered_abs_mean": 0.15659076273441314,
|
|
"signal/brier_reward/group_std_mean": 0.19731901586055756,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019573845341801642,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019573845341801642,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023388362675905227,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.031004397571086882,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029235453344881534,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029235453344881534,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003098398260772228,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00491497041657567,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.546132597373798e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.546132597373798e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19432482421398162,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24642258882522583,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19432482421398162,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24642258882522583,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19432482421398162,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24642258882522583,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19432482421398162,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24642258882522583,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17647163271903993,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22427623569965363,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031588422134518623,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031588422134518623,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19432482421398162,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24642258882522583,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034784142393618823,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014592299051582814,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.018116169050335883,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018240373814478517,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018240373814478517,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2824695747893299,
|
|
"calibration/batch_distribution_entropy": 0.9428787987315607,
|
|
"calibration/buffer_distribution_entropy": 0.9719925690177561,
|
|
"calibration/confidence_entropy": 0.41514944308510227,
|
|
"calibration/coverage@0%": 0.020703125,
|
|
"calibration/coverage@1%": 0.020703125,
|
|
"calibration/coverage@10%": 0.147265625,
|
|
"calibration/coverage@15%": 0.242578125,
|
|
"calibration/coverage@20%": 0.3546875,
|
|
"calibration/coverage@25%": 0.405859375,
|
|
"calibration/coverage@30%": 0.46328125,
|
|
"calibration/coverage@5%": 0.028125,
|
|
"calibration/ece": 0.11745400373456068,
|
|
"calibration/mean_confidence": 0.5382869135028088,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 703.4,
|
|
"completions/max_terminated_length": 544.8,
|
|
"completions/mean_length": 194.23974609375,
|
|
"completions/mean_terminated_length": 194.10873718261718,
|
|
"completions/min_length": 92.4,
|
|
"completions/min_terminated_length": 92.4,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0009216758189722896,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 453978105.0,
|
|
"reward": 1.0048931002616883,
|
|
"reward_std": 0.08277135789394378,
|
|
"rewards/accuracy_reward": 0.53837890625,
|
|
"rewards/brier_reward": 0.8011577129364014,
|
|
"rewards/confidence_uniqueness_reward": 0.9518381714820862,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0027131067123264073,
|
|
"rewards/frontier_coverage_1": 0.1438295803964138,
|
|
"rewards/frontier_coverage_10": 0.1438295803964138,
|
|
"rewards/frontier_coverage_15": 0.1438295803964138,
|
|
"rewards/frontier_coverage_20": 0.1438295803964138,
|
|
"rewards/frontier_coverage_25": 0.1322506435215473,
|
|
"rewards/frontier_coverage_5": 0.1438295803964138,
|
|
"rewards/frontier_ece_reward": 0.01188302058726549,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.114117431640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.14671022891998292,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0570587158203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0570587158203125,
|
|
"signal/advantage_abs_mean": 0.06361640393733978,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06361640393733978,
|
|
"signal/advantage_pre_scale_std": 0.10817221403121949,
|
|
"signal/advantage_std": 0.10817221403121949,
|
|
"signal/brier_reward/centered_abs_mean": 0.13704997897148133,
|
|
"signal/brier_reward/group_std_mean": 0.17600221931934357,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017131247371435166,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017131247371435166,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023459725454449652,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03010900169610977,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029324656818062065,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029324656818062065,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002545620733872056,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004147910000756383,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.556660787784495e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.556660787784495e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1864376574754715,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23926096856594087,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1864376574754715,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23926096856594087,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1864376574754715,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23926096856594087,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1864376574754715,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23926096856594087,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16645156741142272,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21410418748855592,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029794828966259955,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029794828966259955,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1864376574754715,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23926096856594087,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003337233932688832,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01259520035237074,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.015745421312749384,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015744000440463424,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015744000440463424,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29260948300243605,
|
|
"calibration/batch_distribution_entropy": 0.9606903533224823,
|
|
"calibration/buffer_distribution_entropy": 0.9695915320747955,
|
|
"calibration/confidence_entropy": 0.4582926324034869,
|
|
"calibration/coverage@0%": 0.02734375,
|
|
"calibration/coverage@1%": 0.02734375,
|
|
"calibration/coverage@10%": 0.063671875,
|
|
"calibration/coverage@15%": 0.116015625,
|
|
"calibration/coverage@20%": 0.23125,
|
|
"calibration/coverage@25%": 0.373046875,
|
|
"calibration/coverage@30%": 0.53828125,
|
|
"calibration/coverage@5%": 0.036328125,
|
|
"calibration/ece": 0.14489365937185286,
|
|
"calibration/mean_confidence": 0.5384731583322886,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 584.8,
|
|
"completions/max_terminated_length": 584.8,
|
|
"completions/mean_length": 201.206640625,
|
|
"completions/mean_terminated_length": 201.206640625,
|
|
"completions/min_length": 78.6,
|
|
"completions/min_terminated_length": 78.6,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0008310034754686058,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 470991261.0,
|
|
"reward": 0.9908460736274719,
|
|
"reward_std": 0.08178583383560181,
|
|
"rewards/accuracy_reward": 0.5119140625,
|
|
"rewards/brier_reward": 0.7910648584365845,
|
|
"rewards/confidence_uniqueness_reward": 0.952523159980774,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.002787482738494873,
|
|
"rewards/frontier_coverage_1": 0.15150942653417587,
|
|
"rewards/frontier_coverage_10": 0.15150942653417587,
|
|
"rewards/frontier_coverage_15": 0.15150942653417587,
|
|
"rewards/frontier_coverage_20": 0.15150942653417587,
|
|
"rewards/frontier_coverage_25": 0.13818347454071045,
|
|
"rewards/frontier_coverage_5": 0.15150942653417587,
|
|
"rewards/frontier_ece_reward": 0.00960810985416174,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.108056640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.14055088460445403,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0540283203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0540283203125,
|
|
"signal/advantage_abs_mean": 0.0633295938372612,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0633295938372612,
|
|
"signal/advantage_pre_scale_std": 0.1058726117014885,
|
|
"signal/advantage_std": 0.1058726117014885,
|
|
"signal/brier_reward/centered_abs_mean": 0.14405288696289062,
|
|
"signal/brier_reward/group_std_mean": 0.18299511671066285,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018006610870361327,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018006610870361327,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022896628081798553,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029164545238018036,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002862078510224819,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002862078510224819,
|
|
"signal/format_reward/centered_abs_mean": 0.000823974609375,
|
|
"signal/format_reward/group_std_mean": 0.0011528188362717629,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004119873046875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004119873046875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002245605061762035,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00356750157661736,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0196329064201566e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0196329064201566e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1972437471151352,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2506330370903015,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1972437471151352,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2506330370903015,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1972437471151352,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2506330370903015,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1972437471151352,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2506330370903015,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1650971680879593,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21136927902698516,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029552392661571503,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029552392661571503,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1972437471151352,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2506330370903015,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003530662879347801,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012216190062463283,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.015183654241263866,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015270237578079104,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015270237578079104,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4077113827188322,
|
|
"calibration/batch_distribution_entropy": 0.9615084032212978,
|
|
"calibration/buffer_distribution_entropy": 0.9685401991489083,
|
|
"calibration/confidence_entropy": 0.45527402259160316,
|
|
"calibration/coverage@0%": 0.003515625,
|
|
"calibration/coverage@1%": 0.003515625,
|
|
"calibration/coverage@10%": 0.009765625,
|
|
"calibration/coverage@15%": 0.04296875,
|
|
"calibration/coverage@20%": 0.071484375,
|
|
"calibration/coverage@25%": 0.191796875,
|
|
"calibration/coverage@30%": 0.28515625,
|
|
"calibration/coverage@5%": 0.003515625,
|
|
"calibration/ece": 0.1355396044993245,
|
|
"calibration/mean_confidence": 0.45190300263963845,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 666.2,
|
|
"completions/max_terminated_length": 450.4,
|
|
"completions/mean_length": 208.301953125,
|
|
"completions/mean_terminated_length": 208.17261352539063,
|
|
"completions/min_length": 96.6,
|
|
"completions/min_terminated_length": 96.6,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0008374059689231217,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 488295089.0,
|
|
"reward": 0.9605985045433044,
|
|
"reward_std": 0.07935848534107208,
|
|
"rewards/accuracy_reward": 0.45380859375,
|
|
"rewards/brier_reward": 0.7694530367851258,
|
|
"rewards/confidence_uniqueness_reward": 0.9470274209976196,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.00324577521532774,
|
|
"rewards/frontier_coverage_1": 0.17469169050455094,
|
|
"rewards/frontier_coverage_10": 0.17469169050455094,
|
|
"rewards/frontier_coverage_15": 0.17469169050455094,
|
|
"rewards/frontier_coverage_20": 0.17469169050455094,
|
|
"rewards/frontier_coverage_25": 0.1511695146560669,
|
|
"rewards/frontier_coverage_5": 0.17469169050455094,
|
|
"rewards/frontier_ece_reward": 0.007592650689184665,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.096746826171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.130436909198761,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0483734130859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0483734130859375,
|
|
"signal/advantage_abs_mean": 0.06013599261641502,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06013599261641502,
|
|
"signal/advantage_pre_scale_std": 0.10274101942777633,
|
|
"signal/advantage_std": 0.10274101942777633,
|
|
"signal/brier_reward/centered_abs_mean": 0.1392355114221573,
|
|
"signal/brier_reward/group_std_mean": 0.17765427827835084,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01740443892776966,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01740443892776966,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025603653863072395,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03302198946475983,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032004567328840494,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032004567328840494,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022237420780584216,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003647429635748267,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.980498222517781e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.980498222517781e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1835351675748825,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23672112226486205,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1835351675748825,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23672112226486205,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1835351675748825,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23672112226486205,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1835351675748825,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23672112226486205,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15318235754966736,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1978419154882431,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002741964068263769,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002741964068263769,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1835351675748825,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23672112226486205,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003285279218107462,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010807633772492409,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013707993924617768,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013509542215615511,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013509542215615511,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30240157627696823,
|
|
"calibration/batch_distribution_entropy": 0.9399514031439473,
|
|
"calibration/buffer_distribution_entropy": 0.9665852737948889,
|
|
"calibration/confidence_entropy": 0.422597238136302,
|
|
"calibration/coverage@0%": 0.00625,
|
|
"calibration/coverage@1%": 0.00625,
|
|
"calibration/coverage@10%": 0.0546875,
|
|
"calibration/coverage@15%": 0.19296875,
|
|
"calibration/coverage@20%": 0.2640625,
|
|
"calibration/coverage@25%": 0.416796875,
|
|
"calibration/coverage@30%": 0.47890625,
|
|
"calibration/coverage@5%": 0.017578125,
|
|
"calibration/ece": 0.16375171296404,
|
|
"calibration/mean_confidence": 0.46227563078596,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 959.2,
|
|
"completions/max_terminated_length": 560.2,
|
|
"completions/mean_length": 210.96083984375,
|
|
"completions/mean_terminated_length": 210.70225830078124,
|
|
"completions/min_length": 100.8,
|
|
"completions/min_terminated_length": 100.8,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.000989911612123251,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 505503360.0,
|
|
"reward": 0.987113094329834,
|
|
"reward_std": 0.08181465268135071,
|
|
"rewards/accuracy_reward": 0.511328125,
|
|
"rewards/brier_reward": 0.7769276857376098,
|
|
"rewards/confidence_uniqueness_reward": 0.9411561250686645,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0026606434723362325,
|
|
"rewards/frontier_coverage_1": 0.14955383241176606,
|
|
"rewards/frontier_coverage_10": 0.14955383241176606,
|
|
"rewards/frontier_coverage_15": 0.14955383241176606,
|
|
"rewards/frontier_coverage_20": 0.14955383241176606,
|
|
"rewards/frontier_coverage_25": 0.1367091566324234,
|
|
"rewards/frontier_coverage_5": 0.14955383241176606,
|
|
"rewards/frontier_ece_reward": 0.008794736303389072,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12056884765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1548303782939911,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.56875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.060284423828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.060284423828125,
|
|
"signal/advantage_abs_mean": 0.06270648390054703,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06270648390054703,
|
|
"signal/advantage_pre_scale_std": 0.10368855893611909,
|
|
"signal/advantage_std": 0.10368855893611909,
|
|
"signal/brier_reward/centered_abs_mean": 0.14834778904914855,
|
|
"signal/brier_reward/group_std_mean": 0.18951214253902435,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01854347363114357,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01854347363114357,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029853297770023345,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03863080143928528,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003731662221252918,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003731662221252918,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020392842590808867,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003261732868850231,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.65031861292664e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.65031861292664e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21441528499126433,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2732947587966919,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21441528499126433,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2732947587966919,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21441528499126433,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2732947587966919,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21441528499126433,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2732947587966919,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18751142024993897,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2398875504732132,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033564542420208452,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033564542420208452,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21441528499126433,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2732947587966919,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00383803341537714,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011400581523776054,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014213207736611366,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014250726904720067,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014250726904720067,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.5242910077871237,
|
|
"eval_calibration/batch_distribution_entropy": 0.8875695608329197,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9648809725349352,
|
|
"eval_calibration/confidence_entropy": 0.42631164594353543,
|
|
"eval_calibration/coverage@0%": 0.0390625,
|
|
"eval_calibration/coverage@1%": 0.0390625,
|
|
"eval_calibration/coverage@10%": 0.0390625,
|
|
"eval_calibration/coverage@15%": 0.0390625,
|
|
"eval_calibration/coverage@20%": 0.1171875,
|
|
"eval_calibration/coverage@25%": 0.1328125,
|
|
"eval_calibration/coverage@30%": 0.2265625,
|
|
"eval_calibration/coverage@5%": 0.0390625,
|
|
"eval_calibration/ece": 0.211484375,
|
|
"eval_calibration/mean_confidence": 0.40789062499999995,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 373.25,
|
|
"eval_completions/max_terminated_length": 373.25,
|
|
"eval_completions/mean_length": 217.67988967895508,
|
|
"eval_completions/mean_terminated_length": 217.67988967895508,
|
|
"eval_completions/min_length": 114.5,
|
|
"eval_completions/min_terminated_length": 114.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 505503360.0,
|
|
"eval_reward": 0.947238028049469,
|
|
"eval_reward_std": 0.21095874905586243,
|
|
"eval_rewards/accuracy_reward": 0.416015625,
|
|
"eval_rewards/brier_reward": 0.8082947880029678,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.890625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.002717375347856432,
|
|
"eval_rewards/frontier_coverage_1": 0.2446550689637661,
|
|
"eval_rewards/frontier_coverage_10": 0.2446550689637661,
|
|
"eval_rewards/frontier_coverage_15": 0.2446550689637661,
|
|
"eval_rewards/frontier_coverage_20": 0.2446550689637661,
|
|
"eval_rewards/frontier_coverage_25": 0.20336830243468285,
|
|
"eval_rewards/frontier_coverage_5": 0.2446550689637661,
|
|
"eval_rewards/frontier_ece_reward": 0.011015785159543157,
|
|
"eval_runtime": 20.0514,
|
|
"eval_samples_per_second": 24.936,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4644775390625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4889160767197609,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23223876953125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23223876953125,
|
|
"eval_signal/advantage_abs_mean": 0.19064204022288322,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19064204022288322,
|
|
"eval_signal/advantage_pre_scale_std": 0.20879125222563744,
|
|
"eval_signal/advantage_std": 0.20879125222563744,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20391716808080673,
|
|
"eval_signal/brier_reward/group_std_mean": 0.26033516973257065,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02548964601010084,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02548964601010084,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0490570068359375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0586219010874629,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0061321258544921875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0061321258544921875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030126574565656483,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005966346827335656,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3926567488815635e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3926567488815635e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.42714041471481323,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.508579321205616,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.42714041471481323,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.508579321205616,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.42714041471481323,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.508579321205616,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.42714041471481323,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.508579321205616,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.35514652729034424,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4233100786805153,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006357122561894357,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006357122561894357,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.42714041471481323,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.508579321205616,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00764581304974854,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.015712440479546785,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.01919988915324211,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001964055059943348,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001964055059943348,
|
|
"eval_steps_per_second": 0.199,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3840361950058275,
|
|
"calibration/batch_distribution_entropy": 0.9363928805886245,
|
|
"calibration/buffer_distribution_entropy": 0.96374512597476,
|
|
"calibration/confidence_entropy": 0.41662806096978233,
|
|
"calibration/coverage@0%": 0.011328125,
|
|
"calibration/coverage@1%": 0.011328125,
|
|
"calibration/coverage@10%": 0.128515625,
|
|
"calibration/coverage@15%": 0.150390625,
|
|
"calibration/coverage@20%": 0.1984375,
|
|
"calibration/coverage@25%": 0.233203125,
|
|
"calibration/coverage@30%": 0.2875,
|
|
"calibration/coverage@5%": 0.078515625,
|
|
"calibration/ece": 0.14795585937499997,
|
|
"calibration/mean_confidence": 0.48408632812499997,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 490.8,
|
|
"completions/max_terminated_length": 490.8,
|
|
"completions/mean_length": 216.8044921875,
|
|
"completions/mean_terminated_length": 216.8044921875,
|
|
"completions/min_length": 98.4,
|
|
"completions/min_terminated_length": 98.4,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0007564805564470589,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 523031278.0,
|
|
"reward": 1.000865638256073,
|
|
"reward_std": 0.08007181584835052,
|
|
"rewards/accuracy_reward": 0.54189453125,
|
|
"rewards/brier_reward": 0.7783055067062378,
|
|
"rewards/confidence_uniqueness_reward": 0.9507560729980469,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0027518115239217877,
|
|
"rewards/frontier_coverage_1": 0.12289173305034637,
|
|
"rewards/frontier_coverage_10": 0.12289173305034637,
|
|
"rewards/frontier_coverage_15": 0.12289173305034637,
|
|
"rewards/frontier_coverage_20": 0.12289173305034637,
|
|
"rewards/frontier_coverage_25": 0.09934463798999786,
|
|
"rewards/frontier_coverage_5": 0.12289173305034637,
|
|
"rewards/frontier_ece_reward": 0.00846287291496992,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.101702880859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1375451058149338,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0508514404296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0508514404296875,
|
|
"signal/advantage_abs_mean": 0.060508400201797485,
|
|
"signal/advantage_pre_scale_abs_mean": 0.060508400201797485,
|
|
"signal/advantage_pre_scale_std": 0.10368632078170777,
|
|
"signal/advantage_std": 0.10368632078170777,
|
|
"signal/brier_reward/centered_abs_mean": 0.1387119174003601,
|
|
"signal/brier_reward/group_std_mean": 0.17947129905223846,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017338989675045012,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017338989675045012,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023092246055603026,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02924296595156193,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028865307569503782,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028865307569503782,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002348742028698325,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003962589660659432,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.204248070891481e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.204248070891481e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17886387705802917,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23453721702098845,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17886387705802917,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23453721702098845,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17886387705802917,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23453721702098845,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17886387705802917,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23453721702098845,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14604896903038025,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.19246629774570465,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026142764370888473,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026142764370888473,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17886387705802917,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23453721702098845,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032016633544117213,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010641206428408623,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013527031242847442,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013301508035510779,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013301508035510779,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31820365277463686,
|
|
"calibration/batch_distribution_entropy": 0.955862569184351,
|
|
"calibration/buffer_distribution_entropy": 0.9614048048735244,
|
|
"calibration/confidence_entropy": 0.4472530108210627,
|
|
"calibration/coverage@0%": 0.017578125,
|
|
"calibration/coverage@1%": 0.017578125,
|
|
"calibration/coverage@10%": 0.187890625,
|
|
"calibration/coverage@15%": 0.29140625,
|
|
"calibration/coverage@20%": 0.357421875,
|
|
"calibration/coverage@25%": 0.424609375,
|
|
"calibration/coverage@30%": 0.491015625,
|
|
"calibration/coverage@5%": 0.09296875,
|
|
"calibration/ece": 0.14949122731587602,
|
|
"calibration/mean_confidence": 0.5130281283575426,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 618.8,
|
|
"completions/max_terminated_length": 618.8,
|
|
"completions/mean_length": 213.851953125,
|
|
"completions/mean_terminated_length": 213.851953125,
|
|
"completions/min_length": 102.4,
|
|
"completions/min_terminated_length": 102.4,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.0008687236113473773,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 540366786.0,
|
|
"reward": 1.0033927321434022,
|
|
"reward_std": 0.08281527161598205,
|
|
"rewards/accuracy_reward": 0.538671875,
|
|
"rewards/brier_reward": 0.7973424553871155,
|
|
"rewards/confidence_uniqueness_reward": 0.9516700744628906,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002556700585409999,
|
|
"rewards/frontier_coverage_1": 0.13710992485284806,
|
|
"rewards/frontier_coverage_10": 0.13710992485284806,
|
|
"rewards/frontier_coverage_15": 0.13710992485284806,
|
|
"rewards/frontier_coverage_20": 0.13710992485284806,
|
|
"rewards/frontier_coverage_25": 0.10893923193216323,
|
|
"rewards/frontier_coverage_5": 0.13710992485284806,
|
|
"rewards/frontier_ece_reward": 0.01003704108297825,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10458984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1432061731815338,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.575,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052294921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052294921875,
|
|
"signal/advantage_abs_mean": 0.06263713538646698,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06263713538646698,
|
|
"signal/advantage_pre_scale_std": 0.10937785655260086,
|
|
"signal/advantage_std": 0.10937785655260086,
|
|
"signal/brier_reward/centered_abs_mean": 0.1311786264181137,
|
|
"signal/brier_reward/group_std_mean": 0.16993843913078308,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016397328302264213,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016397328302264213,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023091053962707518,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029020922258496284,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028863817453384398,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028863817453384398,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022825901862233875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037255555856972934,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.085836226295214e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.085836226295214e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1627916783094406,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21614238023757934,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1627916783094406,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21614238023757934,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1627916783094406,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21614238023757934,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1627916783094406,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21614238023757934,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1238186538219452,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16544671654701232,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022163538727909327,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022163538727909327,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1627916783094406,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21614238023757934,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002913971059024334,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010741004720330238,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013525258377194404,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013426255900412798,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013426255900412798,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20637803832182802,
|
|
"calibration/batch_distribution_entropy": 0.9363854519581405,
|
|
"calibration/buffer_distribution_entropy": 0.9601961260988621,
|
|
"calibration/confidence_entropy": 0.41719762592603954,
|
|
"calibration/coverage@0%": 0.05123685176125244,
|
|
"calibration/coverage@1%": 0.06728381849315068,
|
|
"calibration/coverage@10%": 0.3201450892857143,
|
|
"calibration/coverage@15%": 0.3979008683953033,
|
|
"calibration/coverage@20%": 0.4795690129647749,
|
|
"calibration/coverage@25%": 0.6295980613992171,
|
|
"calibration/coverage@30%": 0.7257208598336595,
|
|
"calibration/coverage@5%": 0.18101608365949118,
|
|
"calibration/ece": 0.10404099756635832,
|
|
"calibration/mean_confidence": 0.5286149885154776,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 956.0,
|
|
"completions/max_terminated_length": 573.8,
|
|
"completions/mean_length": 215.037890625,
|
|
"completions/mean_terminated_length": 214.7794982910156,
|
|
"completions/min_length": 97.0,
|
|
"completions/min_terminated_length": 97.0,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0008408619905821979,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 557598310.0,
|
|
"reward": 1.003028893470764,
|
|
"reward_std": 0.08370372354984283,
|
|
"rewards/accuracy_reward": 0.53251953125,
|
|
"rewards/brier_reward": 0.8059515237808228,
|
|
"rewards/confidence_uniqueness_reward": 0.950570797920227,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0023401447338983416,
|
|
"rewards/frontier_coverage_1": 0.15629091411828994,
|
|
"rewards/frontier_coverage_10": 0.15629091411828994,
|
|
"rewards/frontier_coverage_15": 0.15629091411828994,
|
|
"rewards/frontier_coverage_20": 0.15629091411828994,
|
|
"rewards/frontier_coverage_25": 0.11881576627492904,
|
|
"rewards/frontier_coverage_5": 0.15629091411828994,
|
|
"rewards/frontier_ece_reward": 0.010219059139490127,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.117535400390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.14876840710639955,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0587677001953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0587677001953125,
|
|
"signal/advantage_abs_mean": 0.06532718688249588,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06532718688249588,
|
|
"signal/advantage_pre_scale_std": 0.11273082941770554,
|
|
"signal/advantage_std": 0.11273082941770554,
|
|
"signal/brier_reward/centered_abs_mean": 0.12941071391105652,
|
|
"signal/brier_reward/group_std_mean": 0.16719320714473723,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016176339238882065,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016176339238882065,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02397966869175434,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03146580345928669,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029974585864692926,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029974585864692926,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002146564261056483,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033703493420034645,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.842349833576009e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.842349833576009e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1766491413116455,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22845688462257385,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1766491413116455,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22845688462257385,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1766491413116455,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22845688462257385,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1766491413116455,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22845688462257385,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1280095487833023,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16677605509757995,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022913708817213774,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022913708817213774,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1766491413116455,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22845688462257385,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003162019606679678,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010062118992209435,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01266515776515007,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012577648740261793,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012577648740261793,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22990942715224433,
|
|
"calibration/batch_distribution_entropy": 0.9086932295556744,
|
|
"calibration/buffer_distribution_entropy": 0.9580016627959249,
|
|
"calibration/confidence_entropy": 0.3994817622971429,
|
|
"calibration/coverage@0%": 0.0140625,
|
|
"calibration/coverage@1%": 0.0140625,
|
|
"calibration/coverage@10%": 0.201953125,
|
|
"calibration/coverage@15%": 0.3609375,
|
|
"calibration/coverage@20%": 0.490625,
|
|
"calibration/coverage@25%": 0.58984375,
|
|
"calibration/coverage@30%": 0.70625,
|
|
"calibration/coverage@5%": 0.094921875,
|
|
"calibration/ece": 0.08248207063576382,
|
|
"calibration/mean_confidence": 0.5646070706357639,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 532.0,
|
|
"completions/max_terminated_length": 532.0,
|
|
"completions/mean_length": 219.7181640625,
|
|
"completions/mean_terminated_length": 219.7181640625,
|
|
"completions/min_length": 101.4,
|
|
"completions/min_terminated_length": 101.4,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.000887208734638989,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 575011808.0,
|
|
"reward": 1.010369873046875,
|
|
"reward_std": 0.0878099650144577,
|
|
"rewards/accuracy_reward": 0.56220703125,
|
|
"rewards/brier_reward": 0.7859106302261353,
|
|
"rewards/confidence_uniqueness_reward": 0.9532241821289062,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002733378019183874,
|
|
"rewards/frontier_coverage_1": 0.10622020661830903,
|
|
"rewards/frontier_coverage_10": 0.10622020661830903,
|
|
"rewards/frontier_coverage_15": 0.10622020661830903,
|
|
"rewards/frontier_coverage_20": 0.10622020661830903,
|
|
"rewards/frontier_coverage_25": 0.07521467357873916,
|
|
"rewards/frontier_coverage_5": 0.10622020661830903,
|
|
"rewards/frontier_ece_reward": 0.008562875911593437,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.122222900390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.16073189973831176,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.54375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0611114501953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0611114501953125,
|
|
"signal/advantage_abs_mean": 0.06658549755811691,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06658549755811691,
|
|
"signal/advantage_pre_scale_std": 0.11341958940029144,
|
|
"signal/advantage_std": 0.11341958940029144,
|
|
"signal/brier_reward/centered_abs_mean": 0.13747948110103608,
|
|
"signal/brier_reward/group_std_mean": 0.17746234834194183,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01718493513762951,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01718493513762951,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02245485782623291,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028442315012216567,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028068572282791138,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028068572282791138,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002469687769189477,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038645863067358734,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.420740733621642e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.420740733621642e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17635051608085633,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23008197844028472,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17635051608085633,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23008197844028472,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17635051608085633,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23008197844028472,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17635051608085633,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23008197844028472,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11544786989688874,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15155554413795472,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020665168296545742,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020665168296545742,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17635051608085633,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23008197844028472,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031566740944981575,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010545129328966141,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013178185001015663,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013181411661207676,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013181411661207676,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25756737417173714,
|
|
"calibration/batch_distribution_entropy": 0.9384644007235714,
|
|
"calibration/buffer_distribution_entropy": 0.956561369285882,
|
|
"calibration/confidence_entropy": 0.41747994135409827,
|
|
"calibration/coverage@0%": 0.06017612524461839,
|
|
"calibration/coverage@1%": 0.1062698752446184,
|
|
"calibration/coverage@10%": 0.2594674963307241,
|
|
"calibration/coverage@15%": 0.35207696306262226,
|
|
"calibration/coverage@20%": 0.3985942086594912,
|
|
"calibration/coverage@25%": 0.47088353106653624,
|
|
"calibration/coverage@30%": 0.6209072284735813,
|
|
"calibration/coverage@5%": 0.2000535102739726,
|
|
"calibration/ece": 0.119488073066949,
|
|
"calibration/mean_confidence": 0.5228720969428358,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 1030.0,
|
|
"completions/max_terminated_length": 619.8,
|
|
"completions/mean_length": 221.8912109375,
|
|
"completions/mean_terminated_length": 221.6349365234375,
|
|
"completions/min_length": 101.4,
|
|
"completions/min_terminated_length": 101.4,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0007872599526308477,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 592105382.0,
|
|
"reward": 0.9981863141059876,
|
|
"reward_std": 0.07935396581888199,
|
|
"rewards/accuracy_reward": 0.52373046875,
|
|
"rewards/brier_reward": 0.8048706293106079,
|
|
"rewards/confidence_uniqueness_reward": 0.9511743545532226,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0027451789472252132,
|
|
"rewards/frontier_coverage_1": 0.1567632645368576,
|
|
"rewards/frontier_coverage_10": 0.1567632645368576,
|
|
"rewards/frontier_coverage_15": 0.1567632645368576,
|
|
"rewards/frontier_coverage_20": 0.1529286891222,
|
|
"rewards/frontier_coverage_25": 0.10326671600341797,
|
|
"rewards/frontier_coverage_5": 0.1567632645368576,
|
|
"rewards/frontier_ece_reward": 0.009607139974832535,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091827392578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12912326753139497,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459136962890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0459136962890625,
|
|
"signal/advantage_abs_mean": 0.05798099935054779,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05798099935054779,
|
|
"signal/advantage_pre_scale_std": 0.10438774973154068,
|
|
"signal/advantage_std": 0.10438774973154068,
|
|
"signal/brier_reward/centered_abs_mean": 0.13308307528495789,
|
|
"signal/brier_reward/group_std_mean": 0.17416214644908906,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016635384410619736,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016635384410619736,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024077710509300233,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03139141947031021,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003009713813662529,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003009713813662529,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002499508252367377,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004307471588253975,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4741196325048806e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4741196325048806e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16583755910396575,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21883132457733154,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16583755910396575,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21883132457733154,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16583755910396575,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21883132457733154,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16073502898216246,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2121128112077713,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028771569021046163,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028771569021046163,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1035612627863884,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13709462583065032,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018537465017288922,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018537465017288922,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16583755910396575,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21883132457733154,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029684923123568297,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009674718603491783,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012150035984814168,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001209339825436473,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001209339825436473,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30044665717816926,
|
|
"calibration/batch_distribution_entropy": 0.9354879991939942,
|
|
"calibration/buffer_distribution_entropy": 0.9569462730506967,
|
|
"calibration/confidence_entropy": 0.4156559235153006,
|
|
"calibration/coverage@0%": 0.028567606409001956,
|
|
"calibration/coverage@1%": 0.028567606409001956,
|
|
"calibration/coverage@10%": 0.1552845217710372,
|
|
"calibration/coverage@15%": 0.2494557240704501,
|
|
"calibration/coverage@20%": 0.32019019080234834,
|
|
"calibration/coverage@25%": 0.40653971991193744,
|
|
"calibration/coverage@30%": 0.5010977250489237,
|
|
"calibration/coverage@5%": 0.061052929305283755,
|
|
"calibration/ece": 0.09349101345519373,
|
|
"calibration/mean_confidence": 0.5241195313734912,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1106.0,
|
|
"completions/max_terminated_length": 509.2,
|
|
"completions/mean_length": 225.293359375,
|
|
"completions/mean_terminated_length": 224.78112182617187,
|
|
"completions/min_length": 103.8,
|
|
"completions/min_terminated_length": 103.8,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0006911300006322563,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 609599010.0,
|
|
"reward": 0.9900464415550232,
|
|
"reward_std": 0.0712356612086296,
|
|
"rewards/accuracy_reward": 0.51416015625,
|
|
"rewards/brier_reward": 0.7882812857627869,
|
|
"rewards/confidence_uniqueness_reward": 0.949445104598999,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0031779037788510324,
|
|
"rewards/frontier_coverage_1": 0.14884960055351257,
|
|
"rewards/frontier_coverage_10": 0.14884960055351257,
|
|
"rewards/frontier_coverage_15": 0.14884960055351257,
|
|
"rewards/frontier_coverage_20": 0.14380609542131423,
|
|
"rewards/frontier_coverage_25": 0.09822984933853149,
|
|
"rewards/frontier_coverage_5": 0.14884960055351257,
|
|
"rewards/frontier_ece_reward": 0.008101735450327396,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080340576171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1135935753583908,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0401702880859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0401702880859375,
|
|
"signal/advantage_abs_mean": 0.052441304177045824,
|
|
"signal/advantage_pre_scale_abs_mean": 0.052441304177045824,
|
|
"signal/advantage_pre_scale_std": 0.09616598784923554,
|
|
"signal/advantage_std": 0.09616598784923554,
|
|
"signal/brier_reward/centered_abs_mean": 0.12765369713306426,
|
|
"signal/brier_reward/group_std_mean": 0.16434176564216613,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015956712141633033,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015956712141633033,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02433336600661278,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.032431261241436006,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030416707508265973,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030416707508265973,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002604444185271859,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004322615498676896,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6619550994364543e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6619550994364543e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16047695577144622,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20908625721931456,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16047695577144622,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20908625721931456,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16047695577144622,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20908625721931456,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15487791895866393,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20193188786506652,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027723145671188832,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027723145671188832,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10067833811044694,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13129209876060485,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018021421507000922,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018021421507000922,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16047695577144622,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20908625721931456,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002872537402436137,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00897240024060011,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011299080029129983,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011215500300750137,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011215500300750137,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3020793610970498,
|
|
"calibration/batch_distribution_entropy": 0.9420785344993752,
|
|
"calibration/buffer_distribution_entropy": 0.9564569849512502,
|
|
"calibration/confidence_entropy": 0.4208254895055754,
|
|
"calibration/coverage@0%": 0.01875,
|
|
"calibration/coverage@1%": 0.01875,
|
|
"calibration/coverage@10%": 0.18203125,
|
|
"calibration/coverage@15%": 0.294921875,
|
|
"calibration/coverage@20%": 0.446875,
|
|
"calibration/coverage@25%": 0.508984375,
|
|
"calibration/coverage@30%": 0.58203125,
|
|
"calibration/coverage@5%": 0.11015625,
|
|
"calibration/ece": 0.147133828125,
|
|
"calibration/mean_confidence": 0.5008921875000001,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 701.4,
|
|
"completions/max_terminated_length": 701.4,
|
|
"completions/mean_length": 226.38173828125,
|
|
"completions/mean_terminated_length": 226.38173828125,
|
|
"completions/min_length": 103.8,
|
|
"completions/min_terminated_length": 103.8,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0008963316213339567,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 627084871.0,
|
|
"reward": 0.9920501828193664,
|
|
"reward_std": 0.07705037146806717,
|
|
"rewards/accuracy_reward": 0.51953125,
|
|
"rewards/brier_reward": 0.7879927754402161,
|
|
"rewards/confidence_uniqueness_reward": 0.9459659218788147,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0027553184889256956,
|
|
"rewards/frontier_coverage_1": 0.14658626317977905,
|
|
"rewards/frontier_coverage_10": 0.14658626317977905,
|
|
"rewards/frontier_coverage_15": 0.14658626317977905,
|
|
"rewards/frontier_coverage_20": 0.1375451296567917,
|
|
"rewards/frontier_coverage_25": 0.0929755374789238,
|
|
"rewards/frontier_coverage_5": 0.14658626317977905,
|
|
"rewards/frontier_ece_reward": 0.008127694483846426,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10269775390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13930981755256652,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051348876953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051348876953125,
|
|
"signal/advantage_abs_mean": 0.05754327178001404,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05754327178001404,
|
|
"signal/advantage_pre_scale_std": 0.10225750654935836,
|
|
"signal/advantage_std": 0.10225750654935836,
|
|
"signal/brier_reward/centered_abs_mean": 0.12910159975290297,
|
|
"signal/brier_reward/group_std_mean": 0.16693442165851594,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01613769996911287,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01613769996911287,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02787396013736725,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03567564189434051,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003484245017170906,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003484245017170906,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002311707567423582,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038075320888310673,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.137956348131411e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.137956348131411e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17492155730724335,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22647031247615815,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17492155730724335,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22647031247615815,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17492155730724335,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22647031247615815,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16214913129806519,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21021865606307982,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029024692717939614,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029024692717939614,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10388463884592056,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13509499579668044,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018595349509268999,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018595349509268999,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17492155730724335,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22647031247615815,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003131095739081502,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008527814783155918,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010785996168851852,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010659768478944898,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010659768478944898,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21089239176890442,
|
|
"calibration/batch_distribution_entropy": 0.9109937668709096,
|
|
"calibration/buffer_distribution_entropy": 0.9564261109380905,
|
|
"calibration/confidence_entropy": 0.3950107808886204,
|
|
"calibration/coverage@0%": 0.0734375,
|
|
"calibration/coverage@1%": 0.076953125,
|
|
"calibration/coverage@10%": 0.319140625,
|
|
"calibration/coverage@15%": 0.424609375,
|
|
"calibration/coverage@20%": 0.533203125,
|
|
"calibration/coverage@25%": 0.6421875,
|
|
"calibration/coverage@30%": 0.71328125,
|
|
"calibration/coverage@5%": 0.214453125,
|
|
"calibration/ece": 0.10328154428114192,
|
|
"calibration/mean_confidence": 0.4780747057188581,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 706.4,
|
|
"completions/max_terminated_length": 508.2,
|
|
"completions/mean_length": 229.04130859375,
|
|
"completions/mean_terminated_length": 228.91400146484375,
|
|
"completions/min_length": 110.6,
|
|
"completions/min_terminated_length": 110.6,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.000727064092643559,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 644429742.0,
|
|
"reward": 0.9991624474525451,
|
|
"reward_std": 0.0650356225669384,
|
|
"rewards/accuracy_reward": 0.517578125,
|
|
"rewards/brier_reward": 0.82053302526474,
|
|
"rewards/confidence_uniqueness_reward": 0.9456512808799744,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0022411981131881475,
|
|
"rewards/frontier_coverage_1": 0.18771364390850068,
|
|
"rewards/frontier_coverage_10": 0.18771364390850068,
|
|
"rewards/frontier_coverage_15": 0.18771364390850068,
|
|
"rewards/frontier_coverage_20": 0.17165526747703552,
|
|
"rewards/frontier_coverage_25": 0.1158706158399582,
|
|
"rewards/frontier_coverage_5": 0.18771364390850068,
|
|
"rewards/frontier_ece_reward": 0.00920899622142315,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0880126953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12031828612089157,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04400634765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04400634765625,
|
|
"signal/advantage_abs_mean": 0.0479205884039402,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0479205884039402,
|
|
"signal/advantage_pre_scale_std": 0.08914662450551987,
|
|
"signal/advantage_std": 0.08914662450551987,
|
|
"signal/brier_reward/centered_abs_mean": 0.1190482184290886,
|
|
"signal/brier_reward/group_std_mean": 0.154274782538414,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014881027303636074,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014881027303636074,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026865917071700097,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03425132632255554,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003358239633962512,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003358239633962512,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001823417330160737,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030636247247457504,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2639168421155776e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2639168421155776e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1752112627029419,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.224535670876503,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1752112627029419,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.224535670876503,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1752112627029419,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.224535670876503,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15818937122821808,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20266130268573762,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028315896168351175,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028315896168351175,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10065800696611404,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12867961823940277,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018017783062532545,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018017783062532545,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1752112627029419,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.224535670876503,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031362815760076048,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00798153392970562,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009954988211393356,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009976917412132026,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009976917412132026,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24885868636965697,
|
|
"calibration/batch_distribution_entropy": 0.953152157528556,
|
|
"calibration/buffer_distribution_entropy": 0.9558545475314965,
|
|
"calibration/confidence_entropy": 0.43321819447817606,
|
|
"calibration/coverage@0%": 0.0109375,
|
|
"calibration/coverage@1%": 0.0109375,
|
|
"calibration/coverage@10%": 0.09375,
|
|
"calibration/coverage@15%": 0.31640625,
|
|
"calibration/coverage@20%": 0.433984375,
|
|
"calibration/coverage@25%": 0.559765625,
|
|
"calibration/coverage@30%": 0.64765625,
|
|
"calibration/coverage@5%": 0.024609375,
|
|
"calibration/ece": 0.10506215171912128,
|
|
"calibration/mean_confidence": 0.4945223541320223,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 982.2,
|
|
"completions/max_terminated_length": 773.0,
|
|
"completions/mean_length": 233.69580078125,
|
|
"completions/mean_terminated_length": 233.56867065429688,
|
|
"completions/min_length": 113.2,
|
|
"completions/min_terminated_length": 113.2,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0008362337248399854,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 662166691.0,
|
|
"reward": 1.0022923231124878,
|
|
"reward_std": 0.07647469788789749,
|
|
"rewards/accuracy_reward": 0.5296875,
|
|
"rewards/brier_reward": 0.8107707142829895,
|
|
"rewards/confidence_uniqueness_reward": 0.9529690384864807,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0024412672501057386,
|
|
"rewards/frontier_coverage_1": 0.16334131360054016,
|
|
"rewards/frontier_coverage_10": 0.16334131360054016,
|
|
"rewards/frontier_coverage_15": 0.16334131360054016,
|
|
"rewards/frontier_coverage_20": 0.15014731287956237,
|
|
"rewards/frontier_coverage_25": 0.0971300944685936,
|
|
"rewards/frontier_coverage_5": 0.16334131360054016,
|
|
"rewards/frontier_ece_reward": 0.007617098093032837,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.103271484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1352292686700821,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0516357421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0516357421875,
|
|
"signal/advantage_abs_mean": 0.058700380474328996,
|
|
"signal/advantage_pre_scale_abs_mean": 0.058700380474328996,
|
|
"signal/advantage_pre_scale_std": 0.10284461975097656,
|
|
"signal/advantage_std": 0.10284461975097656,
|
|
"signal/brier_reward/centered_abs_mean": 0.1295202523469925,
|
|
"signal/brier_reward/group_std_mean": 0.16612099409103392,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01619003154337406,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01619003154337406,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022126782685518265,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028393551334738733,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002765847835689783,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002765847835689783,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020905883517116307,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003407838987186551,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.742152948689181e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.742152948689181e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17640204429626466,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22737206220626832,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17640204429626466,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22737206220626832,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17640204429626466,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22737206220626832,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15503880083560945,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20045762360095978,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002775194449350238,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002775194449350238,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09230681955814361,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12007313668727874,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016522919991984963,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016522919991984963,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17640204429626466,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22737206220626832,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003157596383243799,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0074423874728381635,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009387831203639507,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009302984341047704,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009302984341047704,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27366786661512593,
|
|
"calibration/batch_distribution_entropy": 0.9267823537685029,
|
|
"calibration/buffer_distribution_entropy": 0.9562173422993376,
|
|
"calibration/confidence_entropy": 0.43266101942321056,
|
|
"calibration/coverage@0%": 0.033594514432485324,
|
|
"calibration/coverage@1%": 0.05586013943248532,
|
|
"calibration/coverage@10%": 0.2957038894324853,
|
|
"calibration/coverage@15%": 0.3343757644324853,
|
|
"calibration/coverage@20%": 0.4261726394324853,
|
|
"calibration/coverage@25%": 0.4910163894324853,
|
|
"calibration/coverage@30%": 0.5804695144324853,
|
|
"calibration/coverage@5%": 0.2148445144324853,
|
|
"calibration/ece": 0.1776985742252802,
|
|
"calibration/mean_confidence": 0.5747919124483183,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1343.6,
|
|
"completions/max_terminated_length": 643.4,
|
|
"completions/mean_length": 236.0283203125,
|
|
"completions/mean_terminated_length": 235.26619873046874,
|
|
"completions/min_length": 111.2,
|
|
"completions/min_terminated_length": 111.2,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0007717712433077395,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 679926309.0,
|
|
"reward": 1.0139172077178955,
|
|
"reward_std": 0.073719322681427,
|
|
"rewards/accuracy_reward": 0.56474609375,
|
|
"rewards/brier_reward": 0.801562488079071,
|
|
"rewards/confidence_uniqueness_reward": 0.9502385973930358,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.002655105572193861,
|
|
"rewards/frontier_coverage_1": 0.12261157184839248,
|
|
"rewards/frontier_coverage_10": 0.12261157184839248,
|
|
"rewards/frontier_coverage_15": 0.12261157184839248,
|
|
"rewards/frontier_coverage_20": 0.10896739810705185,
|
|
"rewards/frontier_coverage_25": 0.07364076673984528,
|
|
"rewards/frontier_coverage_5": 0.12261157184839248,
|
|
"rewards/frontier_ece_reward": 0.007285200804471969,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.089215087890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11973312497138977,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0446075439453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0446075439453125,
|
|
"signal/advantage_abs_mean": 0.05503489300608635,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05503489300608635,
|
|
"signal/advantage_pre_scale_std": 0.10273861885070801,
|
|
"signal/advantage_std": 0.10273861885070801,
|
|
"signal/brier_reward/centered_abs_mean": 0.121430304646492,
|
|
"signal/brier_reward/group_std_mean": 0.15940046012401582,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0151787880808115,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0151787880808115,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024930314719676973,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03353976756334305,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031162893399596216,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031162893399596216,
|
|
"signal/format_reward/centered_abs_mean": 0.001312255859375,
|
|
"signal/format_reward/group_std_mean": 0.0035306816454976795,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025112688075751067,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0040036571677774194,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.495171051530633e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.495171051530633e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14729901850223542,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1960638076066971,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14729901850223542,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1960638076066971,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14729901850223542,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1960638076066971,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1258530229330063,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16801635324954986,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022527690045535563,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022527690045535563,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07607003599405289,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10135896503925323,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013616536045446992,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013616536045446992,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14729901850223542,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1960638076066971,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002636652393266559,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00694213742390275,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008880362659692765,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008677671779878437,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008677671779878437,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.4512389389889342,
|
|
"eval_calibration/batch_distribution_entropy": 0.8900863453854657,
|
|
"eval_calibration/buffer_distribution_entropy": 0.956610375342669,
|
|
"eval_calibration/confidence_entropy": 0.41897089379700764,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.078125,
|
|
"eval_calibration/coverage@25%": 0.234375,
|
|
"eval_calibration/coverage@30%": 0.296875,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.22234375,
|
|
"eval_calibration/mean_confidence": 0.50015625,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 424.25,
|
|
"eval_completions/max_terminated_length": 424.25,
|
|
"eval_completions/mean_length": 236.86260604858398,
|
|
"eval_completions/mean_terminated_length": 236.86260604858398,
|
|
"eval_completions/min_length": 140.25,
|
|
"eval_completions/min_terminated_length": 140.25,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 679926309.0,
|
|
"eval_reward": 0.945796549320221,
|
|
"eval_reward_std": 0.23248660191893578,
|
|
"eval_rewards/accuracy_reward": 0.42578125,
|
|
"eval_rewards/brier_reward": 0.7914303839206696,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.900390625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0042260364862158895,
|
|
"eval_rewards/frontier_coverage_1": 0.2153126746416092,
|
|
"eval_rewards/frontier_coverage_10": 0.2153126746416092,
|
|
"eval_rewards/frontier_coverage_15": 0.2153126746416092,
|
|
"eval_rewards/frontier_coverage_20": 0.18361878022551537,
|
|
"eval_rewards/frontier_coverage_25": 0.10805939510464668,
|
|
"eval_rewards/frontier_coverage_5": 0.2153126746416092,
|
|
"eval_rewards/frontier_ece_reward": 0.006932097370736301,
|
|
"eval_runtime": 22.0641,
|
|
"eval_samples_per_second": 22.661,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.476318359375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4956294521689415,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2381591796875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2381591796875,
|
|
"eval_signal/advantage_abs_mean": 0.21480223909020424,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21480223909020424,
|
|
"eval_signal/advantage_pre_scale_std": 0.2299434170126915,
|
|
"eval_signal/advantage_std": 0.2299434170126915,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2213696613907814,
|
|
"eval_signal/brier_reward/group_std_mean": 0.27776212990283966,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027671207673847675,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027671207673847675,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0403900146484375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04716748744249344,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0050487518310546875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0050487518310546875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0055589916300959885,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.010654692072421312,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.950594630936394e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.950594630936394e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.37191175669431686,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4545610621571541,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.37191175669431686,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4545610621571541,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.37191175669431686,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4545610621571541,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3148370534181595,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3863198012113571,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005635583191178739,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005635583191178739,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.17110588029026985,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.21621626242995262,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030627950909547508,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030627950909547508,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.37191175669431686,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4545610621571541,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006657220306806266,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.011155220679938793,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.013499156106263399,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013944025849923491,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013944025849923491,
|
|
"eval_steps_per_second": 0.181,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4215608850322955,
|
|
"calibration/batch_distribution_entropy": 0.9583672438505666,
|
|
"calibration/buffer_distribution_entropy": 0.9569867028799818,
|
|
"calibration/confidence_entropy": 0.4560377670341033,
|
|
"calibration/coverage@0%": 0.000390625,
|
|
"calibration/coverage@1%": 0.000390625,
|
|
"calibration/coverage@10%": 0.000390625,
|
|
"calibration/coverage@15%": 0.039093077299412915,
|
|
"calibration/coverage@20%": 0.11807424168297456,
|
|
"calibration/coverage@25%": 0.17905913649706456,
|
|
"calibration/coverage@30%": 0.2486217282289628,
|
|
"calibration/coverage@5%": 0.000390625,
|
|
"calibration/ece": 0.1416412763258362,
|
|
"calibration/mean_confidence": 0.527144841838013,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 738.6,
|
|
"completions/max_terminated_length": 543.8,
|
|
"completions/mean_length": 234.63759765625,
|
|
"completions/mean_terminated_length": 234.51047973632814,
|
|
"completions/min_length": 108.2,
|
|
"completions/min_terminated_length": 108.2,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0008832156891003251,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 697185542.0,
|
|
"reward": 0.9804197549819946,
|
|
"reward_std": 0.07824952602386474,
|
|
"rewards/accuracy_reward": 0.49775390625,
|
|
"rewards/brier_reward": 0.780112111568451,
|
|
"rewards/confidence_uniqueness_reward": 0.9548117399215699,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0036599119659513233,
|
|
"rewards/frontier_coverage_1": 0.14614389687776566,
|
|
"rewards/frontier_coverage_10": 0.14614389687776566,
|
|
"rewards/frontier_coverage_15": 0.14614389687776566,
|
|
"rewards/frontier_coverage_20": 0.12369791716337204,
|
|
"rewards/frontier_coverage_25": 0.07793211117386818,
|
|
"rewards/frontier_coverage_5": 0.14614389687776566,
|
|
"rewards/frontier_ece_reward": 0.006139390263706445,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.096759033203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12834607511758805,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0483795166015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0483795166015625,
|
|
"signal/advantage_abs_mean": 0.06001611351966858,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06001611351966858,
|
|
"signal/advantage_pre_scale_std": 0.107490074634552,
|
|
"signal/advantage_std": 0.107490074634552,
|
|
"signal/brier_reward/centered_abs_mean": 0.13132331371307374,
|
|
"signal/brier_reward/group_std_mean": 0.16742262840270997,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016415414214134217,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016415414214134217,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020241304486989974,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0267801396548748,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002530163060873747,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002530163060873747,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032942437566816805,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005665683187544346,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.8966961660189554e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.8966961660189554e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15632550716400145,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2049040824174881,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15632550716400145,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2049040824174881,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15632550716400145,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2049040824174881,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13002455830574036,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17081713378429414,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023274393985047936,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023274393985047936,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07792463153600693,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10254463851451874,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001394850853830576,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001394850853830576,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15632550716400145,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2049040824174881,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002798226475715637,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00685331979766488,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00863857101649046,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00085666497470811,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00085666497470811,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2770882140684193,
|
|
"calibration/batch_distribution_entropy": 0.9175148206245929,
|
|
"calibration/buffer_distribution_entropy": 0.9586387820511565,
|
|
"calibration/confidence_entropy": 0.41085941712176605,
|
|
"calibration/coverage@0%": 0.02890625,
|
|
"calibration/coverage@1%": 0.052734375,
|
|
"calibration/coverage@10%": 0.158203125,
|
|
"calibration/coverage@15%": 0.2578125,
|
|
"calibration/coverage@20%": 0.32734375,
|
|
"calibration/coverage@25%": 0.405859375,
|
|
"calibration/coverage@30%": 0.5078125,
|
|
"calibration/coverage@5%": 0.070703125,
|
|
"calibration/ece": 0.15999164143747102,
|
|
"calibration/mean_confidence": 0.5309644159883717,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 772.2,
|
|
"completions/max_terminated_length": 588.8,
|
|
"completions/mean_length": 234.79482421875,
|
|
"completions/mean_terminated_length": 234.66750793457032,
|
|
"completions/min_length": 108.6,
|
|
"completions/min_terminated_length": 108.6,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.0007301148143596947,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 714503281.0,
|
|
"reward": 0.9924185633659363,
|
|
"reward_std": 0.07214737236499787,
|
|
"rewards/accuracy_reward": 0.5154296875,
|
|
"rewards/brier_reward": 0.7984122276306153,
|
|
"rewards/confidence_uniqueness_reward": 0.9413475751876831,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0031860186252743007,
|
|
"rewards/frontier_coverage_1": 0.1705209881067276,
|
|
"rewards/frontier_coverage_10": 0.1705209881067276,
|
|
"rewards/frontier_coverage_15": 0.1705209881067276,
|
|
"rewards/frontier_coverage_20": 0.1439062923192978,
|
|
"rewards/frontier_coverage_25": 0.09587938338518143,
|
|
"rewards/frontier_coverage_5": 0.1705209881067276,
|
|
"rewards/frontier_ece_reward": 0.007095560338348151,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09951171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.130070324242115,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049755859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049755859375,
|
|
"signal/advantage_abs_mean": 0.054243403673172,
|
|
"signal/advantage_pre_scale_abs_mean": 0.054243403673172,
|
|
"signal/advantage_pre_scale_std": 0.09895178079605102,
|
|
"signal/advantage_std": 0.09895178079605102,
|
|
"signal/brier_reward/centered_abs_mean": 0.12464683204889297,
|
|
"signal/brier_reward/group_std_mean": 0.16111274659633637,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015580854006111622,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015580854006111622,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029499797150492668,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03853048831224441,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036874746438115835,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036874746438115835,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002818222576752305,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004558488540351391,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.044618155807257e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.044618155807257e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16789944767951964,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21639321148395538,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16789944767951964,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21639321148395538,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16789944767951964,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21639321148395538,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13216465711593628,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1715537875890732,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023657473269850017,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023657473269850017,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08391138613224029,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10859294980764389,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015020138118416071,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015020138118416071,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16789944767951964,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21639321148395538,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030053998343646526,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006560100056231022,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008255177177488804,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008200125070288777,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008200125070288777,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3324319783208281,
|
|
"calibration/batch_distribution_entropy": 0.9320277946427449,
|
|
"calibration/buffer_distribution_entropy": 0.9584549993018816,
|
|
"calibration/confidence_entropy": 0.42771008132219396,
|
|
"calibration/coverage@0%": 0.01917196673189824,
|
|
"calibration/coverage@1%": 0.01917196673189824,
|
|
"calibration/coverage@10%": 0.09771052470645793,
|
|
"calibration/coverage@15%": 0.1715569960861057,
|
|
"calibration/coverage@20%": 0.2720125978473581,
|
|
"calibration/coverage@25%": 0.4049130075831703,
|
|
"calibration/coverage@30%": 0.6006367722602739,
|
|
"calibration/coverage@5%": 0.01917196673189824,
|
|
"calibration/ece": 0.14269236415772915,
|
|
"calibration/mean_confidence": 0.507278526168631,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 735.4,
|
|
"completions/max_terminated_length": 545.2,
|
|
"completions/mean_length": 236.37587890625,
|
|
"completions/mean_terminated_length": 236.2491943359375,
|
|
"completions/min_length": 109.8,
|
|
"completions/min_terminated_length": 109.8,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0009615990566089749,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 731877690.0,
|
|
"reward": 0.9984089136123657,
|
|
"reward_std": 0.07327373176813126,
|
|
"rewards/accuracy_reward": 0.528515625,
|
|
"rewards/brier_reward": 0.800303041934967,
|
|
"rewards/confidence_uniqueness_reward": 0.9471487998962402,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0026645056437700986,
|
|
"rewards/frontier_coverage_1": 0.1580977201461792,
|
|
"rewards/frontier_coverage_10": 0.1580977201461792,
|
|
"rewards/frontier_coverage_15": 0.1580977201461792,
|
|
"rewards/frontier_coverage_20": 0.12280210703611374,
|
|
"rewards/frontier_coverage_25": 0.08494900315999984,
|
|
"rewards/frontier_coverage_5": 0.1580977201461792,
|
|
"rewards/frontier_ece_reward": 0.006611639633774757,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10098876953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1342798501253128,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.050494384765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.050494384765625,
|
|
"signal/advantage_abs_mean": 0.055242912471294404,
|
|
"signal/advantage_pre_scale_abs_mean": 0.055242912471294404,
|
|
"signal/advantage_pre_scale_std": 0.10234064608812332,
|
|
"signal/advantage_std": 0.10234064608812332,
|
|
"signal/brier_reward/centered_abs_mean": 0.12077709585428238,
|
|
"signal/brier_reward/group_std_mean": 0.15675756931304932,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015097136981785298,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015097136981785298,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025125860422849654,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.032760153710842135,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003140732552856207,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003140732552856207,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002273104293271899,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036782562732696534,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.068856578669511e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.068856578669511e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16195787191390992,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20971050560474397,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16195787191390992,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20971050560474397,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16195787191390992,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20971050560474397,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12282232344150543,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1596350461244583,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021985195111483336,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021985195111483336,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08016574680805207,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10362372994422912,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014349668985232712,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014349668985232712,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16195787191390992,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20971050560474397,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002899045730009675,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006114064436405897,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007758526690304279,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007642580545507372,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007642580545507372,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26502404014152814,
|
|
"calibration/batch_distribution_entropy": 0.9127524752143842,
|
|
"calibration/buffer_distribution_entropy": 0.9590662096299957,
|
|
"calibration/confidence_entropy": 0.40612629682905776,
|
|
"calibration/coverage@0%": 0.004689028864970646,
|
|
"calibration/coverage@1%": 0.004689028864970646,
|
|
"calibration/coverage@10%": 0.010559870352250488,
|
|
"calibration/coverage@15%": 0.18851593077299414,
|
|
"calibration/coverage@20%": 0.29834959026418784,
|
|
"calibration/coverage@25%": 0.5343130809686889,
|
|
"calibration/coverage@30%": 0.6917632399706457,
|
|
"calibration/coverage@5%": 0.004689028864970646,
|
|
"calibration/ece": 0.10851483484359577,
|
|
"calibration/mean_confidence": 0.5313303860375503,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 932.4,
|
|
"completions/max_terminated_length": 527.6,
|
|
"completions/mean_length": 234.701171875,
|
|
"completions/mean_terminated_length": 234.44721069335938,
|
|
"completions/min_length": 108.4,
|
|
"completions/min_terminated_length": 108.4,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.000645692169200629,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 749147174.0,
|
|
"reward": 1.0034277796745301,
|
|
"reward_std": 0.06270710080862045,
|
|
"rewards/accuracy_reward": 0.53623046875,
|
|
"rewards/brier_reward": 0.8096086740493774,
|
|
"rewards/confidence_uniqueness_reward": 0.9504172205924988,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0027049076044932006,
|
|
"rewards/frontier_coverage_1": 0.15413620173931122,
|
|
"rewards/frontier_coverage_10": 0.15413620173931122,
|
|
"rewards/frontier_coverage_15": 0.15413620173931122,
|
|
"rewards/frontier_coverage_20": 0.12171441465616226,
|
|
"rewards/frontier_coverage_25": 0.08325019925832748,
|
|
"rewards/frontier_coverage_5": 0.15413620173931122,
|
|
"rewards/frontier_ece_reward": 0.006393497437238693,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.072747802734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.0994048297405243,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363739013671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0363739013671875,
|
|
"signal/advantage_abs_mean": 0.04672844707965851,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04672844707965851,
|
|
"signal/advantage_pre_scale_std": 0.08910781294107437,
|
|
"signal/advantage_std": 0.08910781294107437,
|
|
"signal/brier_reward/centered_abs_mean": 0.11959185302257538,
|
|
"signal/brier_reward/group_std_mean": 0.15349071621894836,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014948981627821923,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014948981627821923,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022796983271837233,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03028981201350689,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002849622908979654,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002849622908979654,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023922407533973457,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003952773287892342,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.282110749045387e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.282110749045387e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15440512895584108,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19809613525867462,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15440512895584108,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19809613525867462,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15440512895584108,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19809613525867462,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11523250043392182,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14851680397987366,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020626616897061467,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020626616897061467,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07550591826438904,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09685217291116714,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001351555879227817,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001351555879227817,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15440512895584108,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19809613525867462,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002763851685449481,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005882252100855112,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007382483780384063,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000735281512606889,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000735281512606889,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23271464846876766,
|
|
"calibration/batch_distribution_entropy": 0.9358357262640282,
|
|
"calibration/buffer_distribution_entropy": 0.9581519056963123,
|
|
"calibration/confidence_entropy": 0.4162220078231755,
|
|
"calibration/coverage@0%": 0.072265625,
|
|
"calibration/coverage@1%": 0.084765625,
|
|
"calibration/coverage@10%": 0.253125,
|
|
"calibration/coverage@15%": 0.379296875,
|
|
"calibration/coverage@20%": 0.47265625,
|
|
"calibration/coverage@25%": 0.57265625,
|
|
"calibration/coverage@30%": 0.667578125,
|
|
"calibration/coverage@5%": 0.16796875,
|
|
"calibration/ece": 0.1367582091779871,
|
|
"calibration/mean_confidence": 0.5125693560602549,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 593.6,
|
|
"completions/max_terminated_length": 593.6,
|
|
"completions/mean_length": 237.528125,
|
|
"completions/mean_terminated_length": 237.528125,
|
|
"completions/min_length": 107.8,
|
|
"completions/min_terminated_length": 107.8,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0007646158919669688,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 766589318.0,
|
|
"reward": 1.0137495040893554,
|
|
"reward_std": 0.06675339564681053,
|
|
"rewards/accuracy_reward": 0.559375,
|
|
"rewards/brier_reward": 0.8103640794754028,
|
|
"rewards/confidence_uniqueness_reward": 0.9529289245605469,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002305832249112427,
|
|
"rewards/frontier_coverage_1": 0.13652174174785614,
|
|
"rewards/frontier_coverage_10": 0.13652174174785614,
|
|
"rewards/frontier_coverage_15": 0.13516611903905867,
|
|
"rewards/frontier_coverage_20": 0.10529735386371612,
|
|
"rewards/frontier_coverage_25": 0.07399671077728272,
|
|
"rewards/frontier_coverage_5": 0.13652174174785614,
|
|
"rewards/frontier_ece_reward": 0.005852967128157615,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09056396484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1233248084783554,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045281982421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045281982421875,
|
|
"signal/advantage_abs_mean": 0.04960698410868645,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04960698410868645,
|
|
"signal/advantage_pre_scale_std": 0.09404271245002746,
|
|
"signal/advantage_std": 0.09404271245002746,
|
|
"signal/brier_reward/centered_abs_mean": 0.1129736065864563,
|
|
"signal/brier_reward/group_std_mean": 0.14554286301136016,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014121700823307038,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014121700823307038,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02097158432006836,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.026838266104459763,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002621448040008545,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002621448040008545,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020008538849651814,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003258723812177777,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.581528362701647e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.581528362701647e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15535161793231964,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20127532184123992,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027807938866317274,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027807938866317274,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15535161793231964,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20127532184123992,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027807938866317274,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027807938866317274,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1520615577697754,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19707475900650023,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027219018433243037,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027219018433243037,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10928000509738922,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14226324558258058,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019561121007427573,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019561121007427573,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06993094086647034,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09024198353290558,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001251763803884387,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001251763803884387,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15535161793231964,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20127532184123992,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027807938866317274,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027807938866317274,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005452153272926807,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00692967027425766,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006815191591158509,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006815191591158509,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2486706639284412,
|
|
"calibration/batch_distribution_entropy": 0.9398357791188333,
|
|
"calibration/buffer_distribution_entropy": 0.9573366301942062,
|
|
"calibration/confidence_entropy": 0.4341578722653532,
|
|
"calibration/coverage@0%": 0.012109375,
|
|
"calibration/coverage@1%": 0.012109375,
|
|
"calibration/coverage@10%": 0.101953125,
|
|
"calibration/coverage@15%": 0.211328125,
|
|
"calibration/coverage@20%": 0.461328125,
|
|
"calibration/coverage@25%": 0.554296875,
|
|
"calibration/coverage@30%": 0.637890625,
|
|
"calibration/coverage@5%": 0.0421875,
|
|
"calibration/ece": 0.12902223483594827,
|
|
"calibration/mean_confidence": 0.5501059461619251,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 812.6,
|
|
"completions/max_terminated_length": 639.8,
|
|
"completions/mean_length": 241.2818359375,
|
|
"completions/mean_terminated_length": 241.15572814941407,
|
|
"completions/min_length": 108.4,
|
|
"completions/min_terminated_length": 108.4,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0005533373332582414,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 783999628.0,
|
|
"reward": 1.0136502385139465,
|
|
"reward_std": 0.06907420158386231,
|
|
"rewards/accuracy_reward": 0.564453125,
|
|
"rewards/brier_reward": 0.8001478791236878,
|
|
"rewards/confidence_uniqueness_reward": 0.9533275842666626,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0023767944891005754,
|
|
"rewards/frontier_coverage_1": 0.12251611799001694,
|
|
"rewards/frontier_coverage_10": 0.12251611799001694,
|
|
"rewards/frontier_coverage_15": 0.12045876532793046,
|
|
"rewards/frontier_coverage_20": 0.09563716128468513,
|
|
"rewards/frontier_coverage_25": 0.06998000591993332,
|
|
"rewards/frontier_coverage_5": 0.12251611799001694,
|
|
"rewards/frontier_ece_reward": 0.0050458677113056185,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0895751953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1197155088186264,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04478759765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04478759765625,
|
|
"signal/advantage_abs_mean": 0.05282713621854782,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05282713621854782,
|
|
"signal/advantage_pre_scale_std": 0.09655779153108597,
|
|
"signal/advantage_std": 0.09655779153108597,
|
|
"signal/brier_reward/centered_abs_mean": 0.11980518698692322,
|
|
"signal/brier_reward/group_std_mean": 0.15468465983867646,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014975648373365402,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014975648373365402,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02088698633015156,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02700975351035595,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002610873291268945,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002610873291268945,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020824840757995844,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003554532490670681,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.727646399056539e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.727646399056539e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15561709105968474,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20363341569900512,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002785545913502574,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002785545913502574,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15561709105968474,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20363341569900512,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002785545913502574,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002785545913502574,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15031374096870423,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19690951704978943,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026906158309429884,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026906158309429884,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10518187284469604,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13869116008281707,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018827555235475303,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018827555235475303,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06872652918100357,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08967986106872558,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012302048038691283,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012302048038691283,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15561709105968474,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20363341569900512,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002785545913502574,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002785545913502574,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005454682745039463,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006951323244720697,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006818353431299329,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006818353431299329,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26921427859740704,
|
|
"calibration/batch_distribution_entropy": 0.9213935076713792,
|
|
"calibration/buffer_distribution_entropy": 0.9566270992945431,
|
|
"calibration/confidence_entropy": 0.4192143462989858,
|
|
"calibration/coverage@0%": 0.00234375,
|
|
"calibration/coverage@1%": 0.00234375,
|
|
"calibration/coverage@10%": 0.1828125,
|
|
"calibration/coverage@15%": 0.2375,
|
|
"calibration/coverage@20%": 0.41640625,
|
|
"calibration/coverage@25%": 0.540625,
|
|
"calibration/coverage@30%": 0.63046875,
|
|
"calibration/coverage@5%": 0.074609375,
|
|
"calibration/ece": 0.11911352214410331,
|
|
"calibration/mean_confidence": 0.46157543829664727,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 591.6,
|
|
"completions/max_terminated_length": 591.6,
|
|
"completions/mean_length": 244.7712890625,
|
|
"completions/mean_terminated_length": 244.7712890625,
|
|
"completions/min_length": 113.4,
|
|
"completions/min_terminated_length": 113.4,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0006762135890312493,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 801733286.0,
|
|
"reward": 1.010276234149933,
|
|
"reward_std": 0.06789586842060089,
|
|
"rewards/accuracy_reward": 0.55498046875,
|
|
"rewards/brier_reward": 0.8036497592926025,
|
|
"rewards/confidence_uniqueness_reward": 0.9525466918945312,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0027418298181146384,
|
|
"rewards/frontier_coverage_1": 0.1369162917137146,
|
|
"rewards/frontier_coverage_10": 0.1369162917137146,
|
|
"rewards/frontier_coverage_15": 0.12945626229047774,
|
|
"rewards/frontier_coverage_20": 0.09780407398939132,
|
|
"rewards/frontier_coverage_25": 0.06929152756929398,
|
|
"rewards/frontier_coverage_5": 0.1369162917137146,
|
|
"rewards/frontier_ece_reward": 0.005198706267401576,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.087127685546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11871586441993713,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0435638427734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0435638427734375,
|
|
"signal/advantage_abs_mean": 0.050392115116119386,
|
|
"signal/advantage_pre_scale_abs_mean": 0.050392115116119386,
|
|
"signal/advantage_pre_scale_std": 0.0956018552184105,
|
|
"signal/advantage_std": 0.0956018552184105,
|
|
"signal/brier_reward/centered_abs_mean": 0.10981836020946503,
|
|
"signal/brier_reward/group_std_mean": 0.14380425959825516,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013727295026183129,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013727295026183129,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020843768119812013,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02636871188879013,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026054710149765016,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026054710149765016,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002333183842711151,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00404226235114038,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.176398906565737e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.176398906565737e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14656142741441727,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19208039343357086,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026234494522213935,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026234494522213935,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14656142741441727,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19208039343357086,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026234494522213935,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026234494522213935,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13771984726190567,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18060652613639833,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002465185197070241,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002465185197070241,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09731692224740982,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12834441363811494,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001741972891613841,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001741972891613841,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06323997154831887,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08284124583005906,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011319954413920642,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011319954413920642,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14656142741441727,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19208039343357086,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026234494522213935,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026234494522213935,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0052522880025207995,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006774683482944965,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006565360003150999,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006565360003150999,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25391016797868754,
|
|
"calibration/batch_distribution_entropy": 0.9626844496029806,
|
|
"calibration/buffer_distribution_entropy": 0.9558027162029411,
|
|
"calibration/confidence_entropy": 0.4521196567683129,
|
|
"calibration/coverage@0%": 0.0632911876223092,
|
|
"calibration/coverage@1%": 0.0707130626223092,
|
|
"calibration/coverage@10%": 0.2926109955968689,
|
|
"calibration/coverage@15%": 0.3672333659491194,
|
|
"calibration/coverage@20%": 0.42779705846379645,
|
|
"calibration/coverage@25%": 0.4707826259784736,
|
|
"calibration/coverage@30%": 0.5833430161448141,
|
|
"calibration/coverage@5%": 0.21447070694716244,
|
|
"calibration/ece": 0.16563868062743387,
|
|
"calibration/mean_confidence": 0.48634583285162664,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1148.4,
|
|
"completions/max_terminated_length": 619.6,
|
|
"completions/mean_length": 250.61728515625,
|
|
"completions/mean_terminated_length": 250.24109802246093,
|
|
"completions/min_length": 117.0,
|
|
"completions/min_terminated_length": 117.0,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0008496443624608219,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 819232311.0,
|
|
"reward": 0.9929238677024841,
|
|
"reward_std": 0.07314693182706833,
|
|
"rewards/accuracy_reward": 0.51201171875,
|
|
"rewards/brier_reward": 0.8128708124160766,
|
|
"rewards/confidence_uniqueness_reward": 0.9537018656730651,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0023616855032742023,
|
|
"rewards/frontier_coverage_1": 0.16877596378326415,
|
|
"rewards/frontier_coverage_10": 0.16877596378326415,
|
|
"rewards/frontier_coverage_15": 0.16228313446044923,
|
|
"rewards/frontier_coverage_20": 0.11836232990026474,
|
|
"rewards/frontier_coverage_25": 0.08216822892427444,
|
|
"rewards/frontier_coverage_5": 0.16877596378326415,
|
|
"rewards/frontier_ece_reward": 0.0058203617110848425,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091363525390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12203695029020309,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0456817626953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0456817626953125,
|
|
"signal/advantage_abs_mean": 0.05535215809941292,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05535215809941292,
|
|
"signal/advantage_pre_scale_std": 0.10177362710237503,
|
|
"signal/advantage_std": 0.10177362710237503,
|
|
"signal/brier_reward/centered_abs_mean": 0.11626919358968735,
|
|
"signal/brier_reward/group_std_mean": 0.14906791150569915,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014533649198710918,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014533649198710918,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020532801002264022,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027031725272536278,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025666001252830028,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025666001252830028,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019179133465513586,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032503914553672075,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4330648122704585e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4330648122704585e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1560559540987015,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20065269768238067,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027934013400226832,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027934013400226832,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1560559540987015,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20065269768238067,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027934013400226832,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027934013400226832,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14529191553592682,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18685760498046874,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026007251348346473,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026007251348346473,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10000549256801605,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12909981608390808,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017900982638821006,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017900982638821006,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06673097908496857,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08542303293943405,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011944844853132963,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011944844853132963,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1560559540987015,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20065269768238067,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027934013400226832,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027934013400226832,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0050937430001795295,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00648985980078578,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006367178750224412,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006367178750224412,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33023066649008004,
|
|
"calibration/batch_distribution_entropy": 0.9211474546608838,
|
|
"calibration/buffer_distribution_entropy": 0.9545807950532765,
|
|
"calibration/confidence_entropy": 0.4223679881144581,
|
|
"calibration/coverage@0%": 0.00625,
|
|
"calibration/coverage@1%": 0.00625,
|
|
"calibration/coverage@10%": 0.14375,
|
|
"calibration/coverage@15%": 0.23984375,
|
|
"calibration/coverage@20%": 0.298046875,
|
|
"calibration/coverage@25%": 0.373828125,
|
|
"calibration/coverage@30%": 0.425,
|
|
"calibration/coverage@5%": 0.109375,
|
|
"calibration/ece": 0.16420909419043678,
|
|
"calibration/mean_confidence": 0.5384653354060752,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 749.0,
|
|
"completions/max_terminated_length": 749.0,
|
|
"completions/mean_length": 252.1546875,
|
|
"completions/mean_terminated_length": 252.1546875,
|
|
"completions/min_length": 122.2,
|
|
"completions/min_terminated_length": 122.2,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0006861758301965892,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 836988743.0,
|
|
"reward": 1.0032026648521424,
|
|
"reward_std": 0.07250990495085716,
|
|
"rewards/accuracy_reward": 0.54912109375,
|
|
"rewards/brier_reward": 0.7853811979293823,
|
|
"rewards/confidence_uniqueness_reward": 0.9548965454101562,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002936554979532957,
|
|
"rewards/frontier_coverage_1": 0.115447399020195,
|
|
"rewards/frontier_coverage_10": 0.11522519886493683,
|
|
"rewards/frontier_coverage_15": 0.1076996922492981,
|
|
"rewards/frontier_coverage_20": 0.08056422024965286,
|
|
"rewards/frontier_coverage_25": 0.06024104133248329,
|
|
"rewards/frontier_coverage_5": 0.115447399020195,
|
|
"rewards/frontier_ece_reward": 0.004129563085734844,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.097381591796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12832460254430772,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0486907958984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0486907958984375,
|
|
"signal/advantage_abs_mean": 0.05545818880200386,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05545818880200386,
|
|
"signal/advantage_pre_scale_std": 0.10165861696004867,
|
|
"signal/advantage_std": 0.10165861696004867,
|
|
"signal/brier_reward/centered_abs_mean": 0.11744992583990096,
|
|
"signal/brier_reward/group_std_mean": 0.15015294551849365,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01468124072998762,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01468124072998762,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019893622398376463,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.025294922292232513,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002486702799797058,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002486702799797058,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024152032332494856,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003975289314985276,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.323213652241975e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.323213652241975e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15085219144821166,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19441507160663604,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027002541813999415,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027002541813999415,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15072461664676667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1942601442337036,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026979705318808554,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026979705318808554,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1394365519285202,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1801184743642807,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002495914185419679,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002495914185419679,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09248919636011124,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1202873170375824,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016555566107854247,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016555566107854247,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06352094933390617,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08175122737884521,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011370248859748245,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011370248859748245,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15085219144821166,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19441507160663604,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027002541813999415,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027002541813999415,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005043382756412029,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006396861933171749,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006304228445515037,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006304228445515037,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19597532611426857,
|
|
"calibration/batch_distribution_entropy": 0.9391014856904241,
|
|
"calibration/buffer_distribution_entropy": 0.9539163467672903,
|
|
"calibration/confidence_entropy": 0.43941391694571175,
|
|
"calibration/coverage@0%": 0.0609375,
|
|
"calibration/coverage@1%": 0.0609375,
|
|
"calibration/coverage@10%": 0.248828125,
|
|
"calibration/coverage@15%": 0.488671875,
|
|
"calibration/coverage@20%": 0.6046875,
|
|
"calibration/coverage@25%": 0.673828125,
|
|
"calibration/coverage@30%": 0.757421875,
|
|
"calibration/coverage@5%": 0.13203125,
|
|
"calibration/ece": 0.10801228966346155,
|
|
"calibration/mean_confidence": 0.5153138521634614,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 736.8,
|
|
"completions/max_terminated_length": 526.2,
|
|
"completions/mean_length": 252.43125,
|
|
"completions/mean_terminated_length": 252.30570373535156,
|
|
"completions/min_length": 123.6,
|
|
"completions/min_terminated_length": 123.6,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0007989571313373744,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 854584199.0,
|
|
"reward": 1.0239898920059205,
|
|
"reward_std": 0.06917952895164489,
|
|
"rewards/accuracy_reward": 0.580078125,
|
|
"rewards/brier_reward": 0.8201135277748108,
|
|
"rewards/confidence_uniqueness_reward": 0.9562228918075562,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0026165119837969542,
|
|
"rewards/frontier_coverage_1": 0.12336181104183197,
|
|
"rewards/frontier_coverage_10": 0.12248000055551529,
|
|
"rewards/frontier_coverage_15": 0.11451495438814163,
|
|
"rewards/frontier_coverage_20": 0.08619352877140045,
|
|
"rewards/frontier_coverage_25": 0.06512454897165298,
|
|
"rewards/frontier_coverage_5": 0.12336181104183197,
|
|
"rewards/frontier_ece_reward": 0.005098512535914779,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.089453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12056980878114701,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0447265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0447265625,
|
|
"signal/advantage_abs_mean": 0.05227528065443039,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05227528065443039,
|
|
"signal/advantage_pre_scale_std": 0.10132318586111069,
|
|
"signal/advantage_std": 0.10132318586111069,
|
|
"signal/brier_reward/centered_abs_mean": 0.10401596128940582,
|
|
"signal/brier_reward/group_std_mean": 0.1339889034628868,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013001995161175728,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013001995161175728,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01882171183824539,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.023961442708969116,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002352713979780674,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002352713979780674,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020810413639992475,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032477714121341705,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.725063943420537e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.725063943420537e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1313213363289833,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1710223823785782,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002350651752203703,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002350651752203703,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1297929286956787,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16902453005313872,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002323293359950185,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002323293359950185,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11685995012521744,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15256010591983796,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020917929941788316,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020917929941788316,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08047119081020356,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10576021820306777,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014404343208298087,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014404343208298087,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.054990262538194654,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07126960307359695,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000984325702302158,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000984325702302158,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1313213363289833,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1710223823785782,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002350651752203703,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002350651752203703,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004696264863014221,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005948188435286284,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005870331078767776,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005870331078767776,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.4142328683970304,
|
|
"eval_calibration/batch_distribution_entropy": 0.8993784848821733,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9542638871252693,
|
|
"eval_calibration/confidence_entropy": 0.4208433244117349,
|
|
"eval_calibration/coverage@0%": 0.078125,
|
|
"eval_calibration/coverage@1%": 0.078125,
|
|
"eval_calibration/coverage@10%": 0.078125,
|
|
"eval_calibration/coverage@15%": 0.1015625,
|
|
"eval_calibration/coverage@20%": 0.203125,
|
|
"eval_calibration/coverage@25%": 0.296875,
|
|
"eval_calibration/coverage@30%": 0.3125,
|
|
"eval_calibration/coverage@5%": 0.078125,
|
|
"eval_calibration/ece": 0.1930786248852066,
|
|
"eval_calibration/mean_confidence": 0.4963598748852066,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 476.0,
|
|
"eval_completions/max_terminated_length": 476.0,
|
|
"eval_completions/mean_length": 255.57765197753906,
|
|
"eval_completions/mean_terminated_length": 255.57765197753906,
|
|
"eval_completions/min_length": 135.75,
|
|
"eval_completions/min_terminated_length": 135.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 854584199.0,
|
|
"eval_reward": 0.9532016068696976,
|
|
"eval_reward_std": 0.23495937138795853,
|
|
"eval_rewards/accuracy_reward": 0.4453125,
|
|
"eval_rewards/brier_reward": 0.7989254742860794,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.901123046875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0034314218210056424,
|
|
"eval_rewards/frontier_coverage_1": 0.1996590532362461,
|
|
"eval_rewards/frontier_coverage_10": 0.19675859063863754,
|
|
"eval_rewards/frontier_coverage_15": 0.1782199591398239,
|
|
"eval_rewards/frontier_coverage_20": 0.12300213798880577,
|
|
"eval_rewards/frontier_coverage_25": 0.07886525429785252,
|
|
"eval_rewards/frontier_coverage_5": 0.1996590532362461,
|
|
"eval_rewards/frontier_ece_reward": 0.005018939729779959,
|
|
"eval_runtime": 23.4926,
|
|
"eval_samples_per_second": 21.283,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4755859375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4951448142528534,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23779296875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23779296875,
|
|
"eval_signal/advantage_abs_mean": 0.21880921721458435,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21880921721458435,
|
|
"eval_signal/advantage_pre_scale_std": 0.2323761023581028,
|
|
"eval_signal/advantage_std": 0.2323761023581028,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.20660649985074997,
|
|
"eval_signal/brier_reward/group_std_mean": 0.25988300889730453,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025825812481343746,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.025825812481343746,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0436553955078125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05071157868951559,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054569244384765625,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054569244384765625,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004325759713537991,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008416089694947004,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.743109745206311e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.743109745206311e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3520267680287361,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.43477170169353485,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006301278946921229,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006301278946921229,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.34786005318164825,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.42980340868234634,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00622669467702508,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00622669467702508,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3130974769592285,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.38755345344543457,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0056044444208964705,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0056044444208964705,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.20560914278030396,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.26048335433006287,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003680403344333172,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003680403344333172,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.11794870160520077,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.15224769711494446,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002111281646648422,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002111281646648422,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3520267680287361,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.43477170169353485,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006301278946921229,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006301278946921229,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.007750884513370693,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.00966967479325831,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009688605641713366,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009688605641713366,
|
|
"eval_steps_per_second": 0.17,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22341894121568937,
|
|
"calibration/batch_distribution_entropy": 0.8868936837741197,
|
|
"calibration/buffer_distribution_entropy": 0.9534030407270027,
|
|
"calibration/confidence_entropy": 0.40355038966953816,
|
|
"calibration/coverage@0%": 0.0078125,
|
|
"calibration/coverage@1%": 0.0078125,
|
|
"calibration/coverage@10%": 0.158203125,
|
|
"calibration/coverage@15%": 0.26796875,
|
|
"calibration/coverage@20%": 0.391015625,
|
|
"calibration/coverage@25%": 0.720703125,
|
|
"calibration/coverage@30%": 0.805859375,
|
|
"calibration/coverage@5%": 0.109375,
|
|
"calibration/ece": 0.13161625000000002,
|
|
"calibration/mean_confidence": 0.567055625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 572.6,
|
|
"completions/max_terminated_length": 572.6,
|
|
"completions/mean_length": 250.9498046875,
|
|
"completions/mean_terminated_length": 250.9498046875,
|
|
"completions/min_length": 119.0,
|
|
"completions/min_terminated_length": 119.0,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.0008308417163789272,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 872253093.0,
|
|
"reward": 1.0168833494186402,
|
|
"reward_std": 0.07324363887310029,
|
|
"rewards/accuracy_reward": 0.58017578125,
|
|
"rewards/brier_reward": 0.7875685572624207,
|
|
"rewards/confidence_uniqueness_reward": 0.9539688110351563,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0030643716920167206,
|
|
"rewards/frontier_coverage_1": 0.09161564782261848,
|
|
"rewards/frontier_coverage_10": 0.09113903939723969,
|
|
"rewards/frontier_coverage_15": 0.08651385009288788,
|
|
"rewards/frontier_coverage_20": 0.06792352050542831,
|
|
"rewards/frontier_coverage_25": 0.054780172556638716,
|
|
"rewards/frontier_coverage_5": 0.09161564782261848,
|
|
"rewards/frontier_ece_reward": 0.0040153548121452335,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.096136474609375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12627903670072554,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0480682373046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0480682373046875,
|
|
"signal/advantage_abs_mean": 0.05611480250954628,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05611480250954628,
|
|
"signal/advantage_pre_scale_std": 0.10440016239881515,
|
|
"signal/advantage_std": 0.10440016239881515,
|
|
"signal/brier_reward/centered_abs_mean": 0.1229382187128067,
|
|
"signal/brier_reward/group_std_mean": 0.15722771883010864,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015367277339100838,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015367277339100838,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021144795417785644,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027067623659968378,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026430994272232055,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026430994272232055,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00280195998493582,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004848680645227432,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.015508249925915e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.015508249925915e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14679521322250366,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1901654928922653,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026276342570781706,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026276342570781706,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1450937107205391,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1880299925804138,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025971772614866496,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025971772614866496,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1285434916615486,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16707846224308015,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002300928346812725,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002300928346812725,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0890080213546753,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11617460995912551,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015932435402646662,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015932435402646662,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06208330765366554,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07998019456863403,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011112912092357875,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011112912092357875,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14679521322250366,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1901654928922653,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026276342570781706,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026276342570781706,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004793836083263159,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006085961498320103,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005992295104078948,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005992295104078948,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2786088570549718,
|
|
"calibration/batch_distribution_entropy": 0.938188176116842,
|
|
"calibration/buffer_distribution_entropy": 0.9518384077690992,
|
|
"calibration/confidence_entropy": 0.43254885404867444,
|
|
"calibration/coverage@0%": 0.037890625,
|
|
"calibration/coverage@1%": 0.037890625,
|
|
"calibration/coverage@10%": 0.238671875,
|
|
"calibration/coverage@15%": 0.308203125,
|
|
"calibration/coverage@20%": 0.358984375,
|
|
"calibration/coverage@25%": 0.414453125,
|
|
"calibration/coverage@30%": 0.515234375,
|
|
"calibration/coverage@5%": 0.17109375,
|
|
"calibration/ece": 0.11687695312500002,
|
|
"calibration/mean_confidence": 0.506845703125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 742.0,
|
|
"completions/max_terminated_length": 536.0,
|
|
"completions/mean_length": 252.67412109375,
|
|
"completions/mean_terminated_length": 252.54882202148437,
|
|
"completions/min_length": 115.6,
|
|
"completions/min_terminated_length": 115.6,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0008032754994928837,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 889848828.0,
|
|
"reward": 1.0108869075775146,
|
|
"reward_std": 0.0699038602411747,
|
|
"rewards/accuracy_reward": 0.550390625,
|
|
"rewards/brier_reward": 0.8212600588798523,
|
|
"rewards/confidence_uniqueness_reward": 0.9529539108276367,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002241781633347273,
|
|
"rewards/frontier_coverage_1": 0.14855314195156097,
|
|
"rewards/frontier_coverage_10": 0.14673392921686174,
|
|
"rewards/frontier_coverage_15": 0.12972736209630967,
|
|
"rewards/frontier_coverage_20": 0.09893926084041596,
|
|
"rewards/frontier_coverage_25": 0.07374034821987152,
|
|
"rewards/frontier_coverage_5": 0.14855314195156097,
|
|
"rewards/frontier_ece_reward": 0.005167901515960693,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0928466796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12302683144807816,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04642333984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04642333984375,
|
|
"signal/advantage_abs_mean": 0.053195972740650174,
|
|
"signal/advantage_pre_scale_abs_mean": 0.053195972740650174,
|
|
"signal/advantage_pre_scale_std": 0.10306639075279236,
|
|
"signal/advantage_std": 0.10306639075279236,
|
|
"signal/brier_reward/centered_abs_mean": 0.10160237550735474,
|
|
"signal/brier_reward/group_std_mean": 0.13150315284729003,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012700296938419342,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012700296938419342,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02115917094051838,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02714742161333561,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026448963675647975,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026448963675647975,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018434126162901522,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030642326921224592,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.299708623671904e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.299708623671904e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13409744948148727,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17334451973438264,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002400344191119075,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002400344191119075,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13227225542068483,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1710406243801117,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023676733020693065,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023676733020693065,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11501559019088745,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1489970475435257,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002058779005892575,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002058779005892575,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08325291574001312,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10779815167188644,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014902271097525955,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014902271097525955,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0572903573513031,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07322717756032944,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010254973545670508,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010254973545670508,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13409744948148727,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17334451973438264,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002400344191119075,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002400344191119075,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004283274430781603,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005466235801577568,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005354093038477004,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005354093038477004,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31810909498840123,
|
|
"calibration/batch_distribution_entropy": 0.9300493723481166,
|
|
"calibration/buffer_distribution_entropy": 0.9525486135022445,
|
|
"calibration/confidence_entropy": 0.4521314722308912,
|
|
"calibration/coverage@0%": 0.021484375,
|
|
"calibration/coverage@1%": 0.021484375,
|
|
"calibration/coverage@10%": 0.18203125,
|
|
"calibration/coverage@15%": 0.271484375,
|
|
"calibration/coverage@20%": 0.4078125,
|
|
"calibration/coverage@25%": 0.471484375,
|
|
"calibration/coverage@30%": 0.544921875,
|
|
"calibration/coverage@5%": 0.1015625,
|
|
"calibration/ece": 0.13904648437499995,
|
|
"calibration/mean_confidence": 0.572958203125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 669.0,
|
|
"completions/max_terminated_length": 669.0,
|
|
"completions/mean_length": 250.8658203125,
|
|
"completions/mean_terminated_length": 250.8658203125,
|
|
"completions/min_length": 126.6,
|
|
"completions/min_terminated_length": 126.6,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.000901456514839083,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 907432062.0,
|
|
"reward": 0.9951120018959045,
|
|
"reward_std": 0.06787320524454117,
|
|
"rewards/accuracy_reward": 0.5255859375,
|
|
"rewards/brier_reward": 0.7996316909790039,
|
|
"rewards/confidence_uniqueness_reward": 0.9561546325683594,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0026320197619497778,
|
|
"rewards/frontier_coverage_1": 0.13928882628679276,
|
|
"rewards/frontier_coverage_10": 0.1381850004196167,
|
|
"rewards/frontier_coverage_15": 0.11995747685432434,
|
|
"rewards/frontier_coverage_20": 0.08737777099013329,
|
|
"rewards/frontier_coverage_25": 0.06390020698308944,
|
|
"rewards/frontier_coverage_5": 0.13928882628679276,
|
|
"rewards/frontier_ece_reward": 0.004621562361717224,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08026123046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10938422381877899,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040130615234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.040130615234375,
|
|
"signal/advantage_abs_mean": 0.0514536626636982,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0514536626636982,
|
|
"signal/advantage_pre_scale_std": 0.09874935895204544,
|
|
"signal/advantage_std": 0.09874935895204544,
|
|
"signal/brier_reward/centered_abs_mean": 0.10689050555229188,
|
|
"signal/brier_reward/group_std_mean": 0.13864734917879104,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013361313194036484,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013361313194036484,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019451355934143065,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.024612750858068466,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002431419491767883,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002431419491767883,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002004986209794879,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003225843422114849,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.588925173971802e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.588925173971802e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13043643683195114,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1731602132320404,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023348120506852866,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023348120506852866,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12900474667549133,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17132607698440552,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023091848473995925,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023091848473995925,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11192914545536041,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1488051563501358,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002003531623631716,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002003531623631716,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0816087007522583,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10843551754951478,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014607956632971763,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014607956632971763,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05602134019136429,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07327790409326554,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010027819662354887,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010027819662354887,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13043643683195114,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1731602132320404,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023348120506852866,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023348120506852866,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00407783156260848,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005348461586982012,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00050972894532606,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00050972894532606,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.271196127415333,
|
|
"calibration/batch_distribution_entropy": 0.9267513408952057,
|
|
"calibration/buffer_distribution_entropy": 0.953885675345784,
|
|
"calibration/confidence_entropy": 0.4640439064202364,
|
|
"calibration/coverage@0%": 0.001953125,
|
|
"calibration/coverage@1%": 0.001953125,
|
|
"calibration/coverage@10%": 0.14609375,
|
|
"calibration/coverage@15%": 0.1921875,
|
|
"calibration/coverage@20%": 0.3625,
|
|
"calibration/coverage@25%": 0.43515625,
|
|
"calibration/coverage@30%": 0.49765625,
|
|
"calibration/coverage@5%": 0.069921875,
|
|
"calibration/ece": 0.14137528043337522,
|
|
"calibration/mean_confidence": 0.618042606722683,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1146.8,
|
|
"completions/max_terminated_length": 691.6,
|
|
"completions/mean_length": 253.50556640625,
|
|
"completions/mean_terminated_length": 253.00428466796876,
|
|
"completions/min_length": 124.0,
|
|
"completions/min_terminated_length": 124.0,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0008237656438723207,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 925014775.0,
|
|
"reward": 1.0212122201919556,
|
|
"reward_std": 0.07169701382517815,
|
|
"rewards/accuracy_reward": 0.58408203125,
|
|
"rewards/brier_reward": 0.8013546705245972,
|
|
"rewards/confidence_uniqueness_reward": 0.95478835105896,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0022963778115808963,
|
|
"rewards/frontier_coverage_1": 0.10232354998588562,
|
|
"rewards/frontier_coverage_10": 0.1015251636505127,
|
|
"rewards/frontier_coverage_15": 0.09124607294797897,
|
|
"rewards/frontier_coverage_20": 0.07141445800662041,
|
|
"rewards/frontier_coverage_25": 0.056721173226833344,
|
|
"rewards/frontier_coverage_5": 0.10232354998588562,
|
|
"rewards/frontier_ece_reward": 0.003858886519446969,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.092730712890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12438704073429108,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0463653564453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0463653564453125,
|
|
"signal/advantage_abs_mean": 0.05372531041502952,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05372531041502952,
|
|
"signal/advantage_pre_scale_std": 0.10223406553268433,
|
|
"signal/advantage_std": 0.10223406553268433,
|
|
"signal/brier_reward/centered_abs_mean": 0.11028100401163102,
|
|
"signal/brier_reward/group_std_mean": 0.14314747452735901,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013785125501453877,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013785125501453877,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019714291021227837,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.025689712166786192,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024642863776534797,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024642863776534797,
|
|
"signal/format_reward/centered_abs_mean": 0.00074462890625,
|
|
"signal/format_reward/group_std_mean": 0.0018734002020210027,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019337810343131423,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032589809503406285,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.461468186287675e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.461468186287675e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14040221869945527,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18402603268623352,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025131995789706707,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025131995789706707,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.139146026968956,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18235517740249635,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024907137267291546,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024907137267291546,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11923650801181793,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15632621049880982,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021343334345147015,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021343334345147015,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08577980250120162,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11253109723329544,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001535458443686366,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001535458443686366,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05949989929795265,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07727274596691132,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010650481563061476,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010650481563061476,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14040221869945527,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18402603268623352,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025131995789706707,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025131995789706707,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004180350759997964,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0054055553860962394,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005225438449997455,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005225438449997455,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3662187271340662,
|
|
"calibration/batch_distribution_entropy": 0.9424121541193221,
|
|
"calibration/buffer_distribution_entropy": 0.9544058121376885,
|
|
"calibration/confidence_entropy": 0.4228504678202111,
|
|
"calibration/coverage@0%": 0.008984375,
|
|
"calibration/coverage@1%": 0.008984375,
|
|
"calibration/coverage@10%": 0.045703125,
|
|
"calibration/coverage@15%": 0.075390625,
|
|
"calibration/coverage@20%": 0.130859375,
|
|
"calibration/coverage@25%": 0.232421875,
|
|
"calibration/coverage@30%": 0.31875,
|
|
"calibration/coverage@5%": 0.0234375,
|
|
"calibration/ece": 0.15656218074772288,
|
|
"calibration/mean_confidence": 0.5140074932477229,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 560.2,
|
|
"completions/max_terminated_length": 560.2,
|
|
"completions/mean_length": 251.0052734375,
|
|
"completions/mean_terminated_length": 251.0052734375,
|
|
"completions/min_length": 121.8,
|
|
"completions/min_terminated_length": 121.8,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0007305578328669071,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 942732141.0,
|
|
"reward": 0.9916547417640686,
|
|
"reward_std": 0.06835338175296783,
|
|
"rewards/accuracy_reward": 0.518359375,
|
|
"rewards/brier_reward": 0.7975268721580505,
|
|
"rewards/confidence_uniqueness_reward": 0.9533485412597656,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002605132572352886,
|
|
"rewards/frontier_coverage_1": 0.1489662915468216,
|
|
"rewards/frontier_coverage_10": 0.14885261952877044,
|
|
"rewards/frontier_coverage_15": 0.12518833130598067,
|
|
"rewards/frontier_coverage_20": 0.09161524027585984,
|
|
"rewards/frontier_coverage_25": 0.06737890988588333,
|
|
"rewards/frontier_coverage_5": 0.1489662915468216,
|
|
"rewards/frontier_ece_reward": 0.004623535228893161,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0923828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11860855221748352,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04619140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04619140625,
|
|
"signal/advantage_abs_mean": 0.05325910523533821,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05325910523533821,
|
|
"signal/advantage_pre_scale_std": 0.09945199489593506,
|
|
"signal/advantage_std": 0.09945199489593506,
|
|
"signal/brier_reward/centered_abs_mean": 0.11102102249860764,
|
|
"signal/brier_reward/group_std_mean": 0.14243731796741485,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013877627812325955,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013877627812325955,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020394396781921387,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.025670462101697922,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025492995977401734,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025492995977401734,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020469398470595477,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033199348486959933,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.664021860458888e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.664021860458888e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15008485019207002,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19261721670627593,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026865187101066113,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026865187101066113,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14985155463218688,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19233030080795288,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002682342706248164,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002682342706248164,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12626577615737916,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16239021718502045,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002260157372802496,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002260157372802496,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09228641092777252,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11892776638269424,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016519267112016678,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016519267112016678,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.062046286463737485,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07945701777935028,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011106284568086267,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011106284568086267,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15008485019207002,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19261721670627593,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026865187101066113,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026865187101066113,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004392636381089688,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0055966474115848545,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000549079547636211,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000549079547636211,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.37704128995461206,
|
|
"calibration/batch_distribution_entropy": 0.9144648554628553,
|
|
"calibration/buffer_distribution_entropy": 0.9541376919449341,
|
|
"calibration/confidence_entropy": 0.4211639999263033,
|
|
"calibration/coverage@0%": 0.009765625,
|
|
"calibration/coverage@1%": 0.009765625,
|
|
"calibration/coverage@10%": 0.076171875,
|
|
"calibration/coverage@15%": 0.139453125,
|
|
"calibration/coverage@20%": 0.18125,
|
|
"calibration/coverage@25%": 0.23515625,
|
|
"calibration/coverage@30%": 0.42109375,
|
|
"calibration/coverage@5%": 0.04140625,
|
|
"calibration/ece": 0.16247158693895747,
|
|
"calibration/mean_confidence": 0.5423721630610425,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 660.4,
|
|
"completions/max_terminated_length": 660.4,
|
|
"completions/mean_length": 249.89990234375,
|
|
"completions/mean_terminated_length": 249.89990234375,
|
|
"completions/min_length": 127.6,
|
|
"completions/min_terminated_length": 127.6,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0009292135946452618,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 960401964.0,
|
|
"reward": 1.0014619946479797,
|
|
"reward_std": 0.06392379850149155,
|
|
"rewards/accuracy_reward": 0.53779296875,
|
|
"rewards/brier_reward": 0.8009364008903503,
|
|
"rewards/confidence_uniqueness_reward": 0.9536941528320313,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0029405945912003516,
|
|
"rewards/frontier_coverage_1": 0.1434343695640564,
|
|
"rewards/frontier_coverage_10": 0.14290477931499482,
|
|
"rewards/frontier_coverage_15": 0.12304576188325882,
|
|
"rewards/frontier_coverage_20": 0.09274870157241821,
|
|
"rewards/frontier_coverage_25": 0.0677462287247181,
|
|
"rewards/frontier_coverage_5": 0.1434343695640564,
|
|
"rewards/frontier_ece_reward": 0.004168036207556724,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084637451171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11610205471515656,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0423187255859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0423187255859375,
|
|
"signal/advantage_abs_mean": 0.047319182008504865,
|
|
"signal/advantage_pre_scale_abs_mean": 0.047319182008504865,
|
|
"signal/advantage_pre_scale_std": 0.09262912273406983,
|
|
"signal/advantage_std": 0.09262912273406983,
|
|
"signal/brier_reward/centered_abs_mean": 0.10917495787143708,
|
|
"signal/brier_reward/group_std_mean": 0.1393113523721695,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013646869733929635,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013646869733929635,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019398140907287597,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02443733625113964,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024247676134109496,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024247676134109496,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022043085657060145,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035537141375243664,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.945712269342039e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.945712269342039e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1438123404979706,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18452912867069243,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002574240742251277,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002574240742251277,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1431819975376129,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1837300330400467,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002562957629561424,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002562957629561424,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12040194422006607,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15485568046569825,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021551947575062513,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021551947575062513,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08914662450551987,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11499444544315338,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015957244904711843,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015957244904711843,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.060103370994329455,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07683221846818925,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010758502641692758,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010758502641692758,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1438123404979706,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18452912867069243,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002574240742251277,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002574240742251277,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004060229659080506,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005199447367340326,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005075287073850632,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005075287073850632,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36122752635898076,
|
|
"calibration/batch_distribution_entropy": 0.946771030985343,
|
|
"calibration/buffer_distribution_entropy": 0.9530327177612656,
|
|
"calibration/confidence_entropy": 0.44074749209441855,
|
|
"calibration/coverage@0%": 0.030124080882352945,
|
|
"calibration/coverage@1%": 0.030124080882352945,
|
|
"calibration/coverage@10%": 0.11731311274509804,
|
|
"calibration/coverage@15%": 0.20526654411764705,
|
|
"calibration/coverage@20%": 0.31317248774509804,
|
|
"calibration/coverage@25%": 0.37570925245098036,
|
|
"calibration/coverage@30%": 0.46482536764705884,
|
|
"calibration/coverage@5%": 0.05438419117647059,
|
|
"calibration/ece": 0.14812835716976971,
|
|
"calibration/mean_confidence": 0.48950283305724307,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 782.2,
|
|
"completions/max_terminated_length": 605.4,
|
|
"completions/mean_length": 249.14404296875,
|
|
"completions/mean_terminated_length": 248.89211730957032,
|
|
"completions/min_length": 122.2,
|
|
"completions/min_terminated_length": 122.2,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.0009797021048143506,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 978004495.0,
|
|
"reward": 1.0036668181419373,
|
|
"reward_std": 0.06746198162436486,
|
|
"rewards/accuracy_reward": 0.5416015625,
|
|
"rewards/brier_reward": 0.8046979784965516,
|
|
"rewards/confidence_uniqueness_reward": 0.9551046133041382,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002108183712698519,
|
|
"rewards/frontier_coverage_1": 0.13762879073619844,
|
|
"rewards/frontier_coverage_10": 0.13762879073619844,
|
|
"rewards/frontier_coverage_15": 0.12159725055098533,
|
|
"rewards/frontier_coverage_20": 0.09403416961431503,
|
|
"rewards/frontier_coverage_25": 0.0696952298283577,
|
|
"rewards/frontier_coverage_5": 0.13762879073619844,
|
|
"rewards/frontier_ece_reward": 0.004224732192233205,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08880615234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11999956220388412,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044403076171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044403076171875,
|
|
"signal/advantage_abs_mean": 0.05088106840848923,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05088106840848923,
|
|
"signal/advantage_pre_scale_std": 0.09543706178665161,
|
|
"signal/advantage_std": 0.09543706178665161,
|
|
"signal/brier_reward/centered_abs_mean": 0.1156904086470604,
|
|
"signal/brier_reward/group_std_mean": 0.14857376515865325,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01446130108088255,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01446130108088255,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01911727674305439,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02458658292889595,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002389659592881799,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002389659592881799,
|
|
"signal/format_reward/centered_abs_mean": 0.0003662109375,
|
|
"signal/format_reward/group_std_mean": 0.000768545875325799,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016015514265745878,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026247325353324414,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8667769583989866e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8667769583989866e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15861463844776152,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20468551516532899,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028392020147293808,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028392020147293808,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15861463844776152,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20468551516532899,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028392020147293808,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028392020147293808,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13538606017827987,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17446688711643218,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002423410303890705,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002423410303890705,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09944360852241516,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12812657803297042,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017800404457375407,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017800404457375407,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06665683835744858,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08519981652498246,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001193157327361405,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001193157327361405,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15861463844776152,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20468551516532899,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028392020147293808,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028392020147293808,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004177849320694804,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005369494389742613,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005222311650868505,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005222311650868505,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.37730223006107255,
|
|
"calibration/batch_distribution_entropy": 0.9507460509748444,
|
|
"calibration/buffer_distribution_entropy": 0.9543901529907087,
|
|
"calibration/confidence_entropy": 0.45204149685146594,
|
|
"calibration/coverage@0%": 0.011721813725490197,
|
|
"calibration/coverage@1%": 0.011721813725490197,
|
|
"calibration/coverage@10%": 0.026174938725490194,
|
|
"calibration/coverage@15%": 0.057815563725490196,
|
|
"calibration/coverage@20%": 0.1596936274509804,
|
|
"calibration/coverage@25%": 0.24418198529411766,
|
|
"calibration/coverage@30%": 0.3165104166666667,
|
|
"calibration/coverage@5%": 0.011721813725490197,
|
|
"calibration/ece": 0.12675503829656865,
|
|
"calibration/mean_confidence": 0.4814181510416667,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 788.8,
|
|
"completions/max_terminated_length": 663.4,
|
|
"completions/mean_length": 240.012890625,
|
|
"completions/mean_terminated_length": 239.75939025878907,
|
|
"completions/min_length": 121.2,
|
|
"completions/min_terminated_length": 121.2,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0005975121166557074,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 995489043.0,
|
|
"reward": 0.9937048196792603,
|
|
"reward_std": 0.06352801769971847,
|
|
"rewards/accuracy_reward": 0.52548828125,
|
|
"rewards/brier_reward": 0.7934018492698669,
|
|
"rewards/confidence_uniqueness_reward": 0.9475328207015992,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0023260547081008554,
|
|
"rewards/frontier_coverage_1": 0.14467951208353041,
|
|
"rewards/frontier_coverage_10": 0.14467951208353041,
|
|
"rewards/frontier_coverage_15": 0.12346935272216797,
|
|
"rewards/frontier_coverage_20": 0.09609992057085037,
|
|
"rewards/frontier_coverage_25": 0.07109370082616806,
|
|
"rewards/frontier_coverage_5": 0.14467951208353041,
|
|
"rewards/frontier_ece_reward": 0.00408800826407969,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.078668212890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11027546375989913,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393341064453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0393341064453125,
|
|
"signal/advantage_abs_mean": 0.04622294753789902,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04622294753789902,
|
|
"signal/advantage_pre_scale_std": 0.09103738218545913,
|
|
"signal/advantage_std": 0.09103738218545913,
|
|
"signal/brier_reward/centered_abs_mean": 0.10818531513214111,
|
|
"signal/brier_reward/group_std_mean": 0.1421953484416008,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013523164391517638,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013523164391517638,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02333177290856838,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029623343050479888,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029164716135710476,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029164716135710476,
|
|
"signal/format_reward/centered_abs_mean": 0.0003662109375,
|
|
"signal/format_reward/group_std_mean": 0.000768545875325799,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017985031008720398,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002986391820013523,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.219320460630115e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.219320460630115e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1464044988155365,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19371420741081238,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002620640443637967,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002620640443637967,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1464044988155365,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19371420741081238,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620640443637967,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620640443637967,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12273292541503907,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1628485679626465,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002196919359266758,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002196919359266758,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09006080776453018,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1197909340262413,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016120884567499161,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016120884567499161,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06147329062223435,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08052114397287369,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011003718711435795,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011003718711435795,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1464044988155365,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19371420741081238,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002620640443637967,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002620640443637967,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004189421329647303,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005466786120086909,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005236776662059129,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005236776662059129,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2359112787034589,
|
|
"calibration/batch_distribution_entropy": 0.9577303840353146,
|
|
"calibration/buffer_distribution_entropy": 0.955857059591185,
|
|
"calibration/confidence_entropy": 0.4517232801906215,
|
|
"calibration/coverage@0%": 0.067578125,
|
|
"calibration/coverage@1%": 0.08125,
|
|
"calibration/coverage@10%": 0.25234375,
|
|
"calibration/coverage@15%": 0.3578125,
|
|
"calibration/coverage@20%": 0.443359375,
|
|
"calibration/coverage@25%": 0.533203125,
|
|
"calibration/coverage@30%": 0.626953125,
|
|
"calibration/coverage@5%": 0.165625,
|
|
"calibration/ece": 0.11354453125,
|
|
"calibration/mean_confidence": 0.47161171875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 481.6,
|
|
"completions/max_terminated_length": 481.6,
|
|
"completions/mean_length": 233.83076171875,
|
|
"completions/mean_terminated_length": 233.83076171875,
|
|
"completions/min_length": 116.8,
|
|
"completions/min_terminated_length": 116.8,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0009692126768641174,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 1012858894.0,
|
|
"reward": 0.999471652507782,
|
|
"reward_std": 0.0703015498816967,
|
|
"rewards/accuracy_reward": 0.53798828125,
|
|
"rewards/brier_reward": 0.7908595085144043,
|
|
"rewards/confidence_uniqueness_reward": 0.9465713500976562,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002295452752150595,
|
|
"rewards/frontier_coverage_1": 0.1418714702129364,
|
|
"rewards/frontier_coverage_10": 0.1418314516544342,
|
|
"rewards/frontier_coverage_15": 0.12424526810646057,
|
|
"rewards/frontier_coverage_20": 0.0976836234331131,
|
|
"rewards/frontier_coverage_25": 0.06969998776912689,
|
|
"rewards/frontier_coverage_5": 0.1418714702129364,
|
|
"rewards/frontier_ece_reward": 0.00401430269703269,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.104327392578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13720910102128983,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521636962890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0521636962890625,
|
|
"signal/advantage_abs_mean": 0.053348977118730545,
|
|
"signal/advantage_pre_scale_abs_mean": 0.053348977118730545,
|
|
"signal/advantage_pre_scale_std": 0.10006897747516633,
|
|
"signal/advantage_std": 0.10006897747516633,
|
|
"signal/brier_reward/centered_abs_mean": 0.10836757719516754,
|
|
"signal/brier_reward/group_std_mean": 0.1396089732646942,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013545947149395943,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013545947149395943,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023391056060791015,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02954912818968296,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002923882007598877,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002923882007598877,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015608920832164586,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024038115050643684,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.79399668215774e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.79399668215774e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15688484013080597,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20480249226093292,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028082385659217836,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028082385659217836,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15675508975982666,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2046307384967804,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028059160336852073,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028059160336852073,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1311119645833969,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1709737718105316,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023469041101634503,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023469041101634503,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0950249582529068,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12448285669088363,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017009467585012317,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017009467585012317,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0624612458050251,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0814983144402504,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011180563131347298,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011180563131347298,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15688484013080597,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20480249226093292,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028082385659217836,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028082385659217836,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00407049129717052,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005348560772836209,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000508811412146315,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000508811412146315,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3216802000684701,
|
|
"calibration/batch_distribution_entropy": 0.9327274087606267,
|
|
"calibration/buffer_distribution_entropy": 0.9563913914075005,
|
|
"calibration/confidence_entropy": 0.4118774304341909,
|
|
"calibration/coverage@0%": 0.00546875,
|
|
"calibration/coverage@1%": 0.00546875,
|
|
"calibration/coverage@10%": 0.064453125,
|
|
"calibration/coverage@15%": 0.231640625,
|
|
"calibration/coverage@20%": 0.306640625,
|
|
"calibration/coverage@25%": 0.4015625,
|
|
"calibration/coverage@30%": 0.4984375,
|
|
"calibration/coverage@5%": 0.03671875,
|
|
"calibration/ece": 0.14537527941429626,
|
|
"calibration/mean_confidence": 0.5074075330857037,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 513.8,
|
|
"completions/max_terminated_length": 513.8,
|
|
"completions/mean_length": 228.8435546875,
|
|
"completions/mean_terminated_length": 228.8435546875,
|
|
"completions/min_length": 114.4,
|
|
"completions/min_terminated_length": 114.4,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0006835410604253411,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 1030142572.0,
|
|
"reward": 0.9945238113403321,
|
|
"reward_std": 0.05708309561014176,
|
|
"rewards/accuracy_reward": 0.51865234375,
|
|
"rewards/brier_reward": 0.8066446900367736,
|
|
"rewards/confidence_uniqueness_reward": 0.94853515625,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0027884629555046557,
|
|
"rewards/frontier_coverage_1": 0.1710768908262253,
|
|
"rewards/frontier_coverage_10": 0.1710768908262253,
|
|
"rewards/frontier_coverage_15": 0.14798834621906282,
|
|
"rewards/frontier_coverage_20": 0.1122938945889473,
|
|
"rewards/frontier_coverage_25": 0.0784688264131546,
|
|
"rewards/frontier_coverage_5": 0.1710768908262253,
|
|
"rewards/frontier_ece_reward": 0.004796722158789635,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.074957275390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10602360963821411,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0374786376953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0374786376953125,
|
|
"signal/advantage_abs_mean": 0.04150651097297668,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04150651097297668,
|
|
"signal/advantage_pre_scale_std": 0.08337944746017456,
|
|
"signal/advantage_std": 0.08337944746017456,
|
|
"signal/brier_reward/centered_abs_mean": 0.10154019445180892,
|
|
"signal/brier_reward/group_std_mean": 0.1345837637782097,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012692524306476115,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012692524306476115,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022342228889465333,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028145313262939453,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027927786111831667,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027927786111831667,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021448221756145357,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035167032852768897,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.839231430902146e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.839231430902146e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1430927574634552,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19070055186748505,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025613602716475724,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025613602716475724,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1430927574634552,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19070055186748505,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025613602716475724,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025613602716475724,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12214766442775726,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16296298503875734,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021864432375878094,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021864432375878094,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08906677216291428,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11882689893245697,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015942952129989862,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015942952129989862,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06060428842902184,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07976671904325486,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010848167585209012,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010848167585209012,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1430927574634552,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19070055186748505,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025613602716475724,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025613602716475724,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004108147229999304,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00542615270242095,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000513518403749913,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000513518403749913,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.502782782980951,
|
|
"eval_calibration/batch_distribution_entropy": 0.9078093664399773,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9551198154788769,
|
|
"eval_calibration/confidence_entropy": 0.43107506961147557,
|
|
"eval_calibration/coverage@0%": 0.0703125,
|
|
"eval_calibration/coverage@1%": 0.0703125,
|
|
"eval_calibration/coverage@10%": 0.0703125,
|
|
"eval_calibration/coverage@15%": 0.09375,
|
|
"eval_calibration/coverage@20%": 0.1015625,
|
|
"eval_calibration/coverage@25%": 0.140625,
|
|
"eval_calibration/coverage@30%": 0.1796875,
|
|
"eval_calibration/coverage@5%": 0.0703125,
|
|
"eval_calibration/ece": 0.20484375,
|
|
"eval_calibration/mean_confidence": 0.45484375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 356.75,
|
|
"eval_completions/max_terminated_length": 356.75,
|
|
"eval_completions/mean_length": 227.28704071044922,
|
|
"eval_completions/mean_terminated_length": 227.28704071044922,
|
|
"eval_completions/min_length": 122.0,
|
|
"eval_completions/min_terminated_length": 122.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1030142572.0,
|
|
"eval_reward": 0.9478924721479416,
|
|
"eval_reward_std": 0.22710193321108818,
|
|
"eval_rewards/accuracy_reward": 0.4296875,
|
|
"eval_rewards/brier_reward": 0.803790807723999,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.894287109375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0038025410613045096,
|
|
"eval_rewards/frontier_coverage_1": 0.23142481595277786,
|
|
"eval_rewards/frontier_coverage_10": 0.2294025495648384,
|
|
"eval_rewards/frontier_coverage_15": 0.19725340977311134,
|
|
"eval_rewards/frontier_coverage_20": 0.14543926157057285,
|
|
"eval_rewards/frontier_coverage_25": 0.09397028014063835,
|
|
"eval_rewards/frontier_coverage_5": 0.23142481595277786,
|
|
"eval_rewards/frontier_ece_reward": 0.005195607780478895,
|
|
"eval_runtime": 19.8919,
|
|
"eval_samples_per_second": 25.136,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.46728515625,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.490493468940258,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.233642578125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.233642578125,
|
|
"eval_signal/advantage_abs_mean": 0.20863738283514977,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20863738283514977,
|
|
"eval_signal/advantage_pre_scale_std": 0.22465674951672554,
|
|
"eval_signal/advantage_std": 0.22465674951672554,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2109249383211136,
|
|
"eval_signal/brier_reward/group_std_mean": 0.264465369284153,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0263656172901392,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0263656172901392,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0487518310546875,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06017216946929693,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0060939788818359375,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0060939788818359375,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005303551617544144,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.011191037716343999,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.493357356404886e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.493357356404886e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.394241102039814,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.47531820833683014,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.007056915084831417,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.007056915084831417,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.39137494564056396,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.47202398627996445,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00700561108533293,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00700561108533293,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3299722671508789,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.400464303791523,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005906503647565842,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005906503647565842,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.22924000769853592,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.2813456952571869,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004103396320715547,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004103396320715547,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.12824426405131817,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.16049236804246902,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022955723688937724,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022955723688937724,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.394241102039814,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.47531820833683014,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.007056915084831417,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.007056915084831417,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.008114422438666224,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.009916237089782953,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001014302804833278,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001014302804833278,
|
|
"eval_steps_per_second": 0.201,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25771066147986865,
|
|
"calibration/batch_distribution_entropy": 0.9374598895353655,
|
|
"calibration/buffer_distribution_entropy": 0.9549208391660825,
|
|
"calibration/confidence_entropy": 0.44294141392762426,
|
|
"calibration/coverage@0%": 0.03359375,
|
|
"calibration/coverage@1%": 0.03359375,
|
|
"calibration/coverage@10%": 0.28828125,
|
|
"calibration/coverage@15%": 0.4203125,
|
|
"calibration/coverage@20%": 0.476171875,
|
|
"calibration/coverage@25%": 0.54296875,
|
|
"calibration/coverage@30%": 0.59921875,
|
|
"calibration/coverage@5%": 0.06640625,
|
|
"calibration/ece": 0.14654739762244245,
|
|
"calibration/mean_confidence": 0.5065541648775576,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 718.4,
|
|
"completions/max_terminated_length": 519.8,
|
|
"completions/mean_length": 226.2775390625,
|
|
"completions/mean_terminated_length": 226.14942321777343,
|
|
"completions/min_length": 109.4,
|
|
"completions/min_terminated_length": 109.4,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0014678977895528078,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1047320774.0,
|
|
"reward": 1.007277262210846,
|
|
"reward_std": 0.06227183118462563,
|
|
"rewards/accuracy_reward": 0.54677734375,
|
|
"rewards/brier_reward": 0.8078455209732056,
|
|
"rewards/confidence_uniqueness_reward": 0.9517561435699463,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002812092285603285,
|
|
"rewards/frontier_coverage_1": 0.1481065958738327,
|
|
"rewards/frontier_coverage_10": 0.14778946116566657,
|
|
"rewards/frontier_coverage_15": 0.13327017948031425,
|
|
"rewards/frontier_coverage_20": 0.10284108966588974,
|
|
"rewards/frontier_coverage_25": 0.07239802479743958,
|
|
"rewards/frontier_coverage_5": 0.1481065958738327,
|
|
"rewards/frontier_ece_reward": 0.004540855251252651,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085162353515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1166534885764122,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0425811767578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0425811767578125,
|
|
"signal/advantage_abs_mean": 0.045932318270206454,
|
|
"signal/advantage_pre_scale_abs_mean": 0.045932318270206454,
|
|
"signal/advantage_pre_scale_std": 0.08938146680593491,
|
|
"signal/advantage_std": 0.08938146680593491,
|
|
"signal/brier_reward/centered_abs_mean": 0.1009139209985733,
|
|
"signal/brier_reward/group_std_mean": 0.13128523528575897,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012614240124821662,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012614240124821662,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020480955392122267,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02599894180893898,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025601194240152834,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025601194240152834,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002267425088211894,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003777716076001525,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.058690792589914e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.058690792589914e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1478489577770233,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19217921793460846,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026464962400496008,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026464962400496008,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1464183211326599,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19032938480377198,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620887756347656,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620887756347656,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1232375368475914,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16054988354444505,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002205951721407473,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002205951721407473,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08783506155014038,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11469702571630477,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015722476178780197,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015722476178780197,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05822276622056961,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07532109916210175,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010421874932944775,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010421874932944775,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1478489577770233,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19217921793460846,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026464962400496008,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026464962400496008,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0042947923298925165,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005507392250001431,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005368490412365646,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005368490412365646,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36195064149212575,
|
|
"calibration/batch_distribution_entropy": 0.9291159530510076,
|
|
"calibration/buffer_distribution_entropy": 0.9555400002138736,
|
|
"calibration/confidence_entropy": 0.4221106847869228,
|
|
"calibration/coverage@0%": 0.007814031862745098,
|
|
"calibration/coverage@1%": 0.007814031862745098,
|
|
"calibration/coverage@10%": 0.058985906862745095,
|
|
"calibration/coverage@15%": 0.0882827818627451,
|
|
"calibration/coverage@20%": 0.1601577818627451,
|
|
"calibration/coverage@25%": 0.32345894607843134,
|
|
"calibration/coverage@30%": 0.4137530637254902,
|
|
"calibration/coverage@5%": 0.0328140318627451,
|
|
"calibration/ece": 0.15175038551879086,
|
|
"calibration/mean_confidence": 0.4563323503880719,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 833.2,
|
|
"completions/max_terminated_length": 634.0,
|
|
"completions/mean_length": 221.01396484375,
|
|
"completions/mean_terminated_length": 220.75893859863282,
|
|
"completions/min_length": 109.8,
|
|
"completions/min_terminated_length": 109.8,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0008075599907897413,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 1064712437.0,
|
|
"reward": 0.9926512241363525,
|
|
"reward_std": 0.05921575650572777,
|
|
"rewards/accuracy_reward": 0.51943359375,
|
|
"rewards/brier_reward": 0.8002906322479248,
|
|
"rewards/confidence_uniqueness_reward": 0.9479501247406006,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0027773221023380756,
|
|
"rewards/frontier_coverage_1": 0.15749771595001222,
|
|
"rewards/frontier_coverage_10": 0.15701024532318114,
|
|
"rewards/frontier_coverage_15": 0.13756523728370668,
|
|
"rewards/frontier_coverage_20": 0.10082580447196961,
|
|
"rewards/frontier_coverage_25": 0.07283035963773728,
|
|
"rewards/frontier_coverage_5": 0.15749771595001222,
|
|
"rewards/frontier_ece_reward": 0.004255708307027817,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.079058837890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10680015832185745,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0395294189453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0395294189453125,
|
|
"signal/advantage_abs_mean": 0.043850655853748324,
|
|
"signal/advantage_pre_scale_abs_mean": 0.043850655853748324,
|
|
"signal/advantage_pre_scale_std": 0.08708179742097855,
|
|
"signal/advantage_std": 0.08708179742097855,
|
|
"signal/brier_reward/centered_abs_mean": 0.1025318220257759,
|
|
"signal/brier_reward/group_std_mean": 0.13148369193077086,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012816477753221988,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012816477753221988,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02215721495449543,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02793830633163452,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027696518693119286,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027696518693119286,
|
|
"signal/format_reward/centered_abs_mean": 0.0003662109375,
|
|
"signal/format_reward/group_std_mean": 0.000768545875325799,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021893641911447047,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037097081542015074,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9189616654766726e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9189616654766726e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14424746930599214,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.184622061252594,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002582029718905687,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002582029718905687,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14393795430660247,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18423262238502502,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025764893274754287,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025764893274754287,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12318403720855713,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15762063413858413,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002204994112253189,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002204994112253189,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08359026908874512,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10689203143119812,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014962658053264022,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014962658053264022,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.057026924937963484,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07260482162237167,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010207819053903223,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010207819053903223,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14424746930599214,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.184622061252594,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002582029718905687,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002582029718905687,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004057144792750478,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005179398506879806,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005071430990938097,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005071430990938097,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2747143001846701,
|
|
"calibration/batch_distribution_entropy": 0.8806812013835696,
|
|
"calibration/buffer_distribution_entropy": 0.9564232842329621,
|
|
"calibration/confidence_entropy": 0.4171977146312076,
|
|
"calibration/coverage@0%": 0.017578125,
|
|
"calibration/coverage@1%": 0.017578125,
|
|
"calibration/coverage@10%": 0.095703125,
|
|
"calibration/coverage@15%": 0.1357421875,
|
|
"calibration/coverage@20%": 0.158203125,
|
|
"calibration/coverage@25%": 0.5341796875,
|
|
"calibration/coverage@30%": 0.6318359375,
|
|
"calibration/coverage@5%": 0.0537109375,
|
|
"calibration/ece": 0.17599609375000003,
|
|
"calibration/mean_confidence": 0.6320703125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000244140625,
|
|
"completions/max_length": 995.0,
|
|
"completions/max_terminated_length": 803.5,
|
|
"completions/mean_length": 220.93310546875,
|
|
"completions/mean_terminated_length": 220.61163330078125,
|
|
"completions/min_length": 98.5,
|
|
"completions/min_terminated_length": 98.5,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1071619903.0,
|
|
"reward": 0.9982321858406067,
|
|
"reward_std": 0.06220795214176178,
|
|
"rewards/accuracy_reward": 0.54638671875,
|
|
"rewards/brier_reward": 0.7730526924133301,
|
|
"rewards/confidence_uniqueness_reward": 0.954784631729126,
|
|
"rewards/format_reward": 0.999755859375,
|
|
"rewards/frontier_aurc_reward": -0.0029595731757581234,
|
|
"rewards/frontier_coverage_1": 0.10051992163062096,
|
|
"rewards/frontier_coverage_10": 0.10056523606181145,
|
|
"rewards/frontier_coverage_15": 0.07943737879395485,
|
|
"rewards/frontier_coverage_20": 0.06294701993465424,
|
|
"rewards/frontier_coverage_25": 0.04942700266838074,
|
|
"rewards/frontier_coverage_5": 0.10051992163062096,
|
|
"rewards/frontier_ece_reward": 0.003216548007912934,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.073822021484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10335757955908775,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6796875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0369110107421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0369110107421875,
|
|
"signal/advantage_abs_mean": 0.046172238886356354,
|
|
"signal/advantage_pre_scale_abs_mean": 0.046172238886356354,
|
|
"signal/advantage_pre_scale_std": 0.09168939664959908,
|
|
"signal/advantage_std": 0.09168939664959908,
|
|
"signal/brier_reward/centered_abs_mean": 0.10888796299695969,
|
|
"signal/brier_reward/group_std_mean": 0.1396150141954422,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01361099537461996,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01361099537461996,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.019547984935343266,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.024895640090107918,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002443498116917908,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002443498116917908,
|
|
"signal/format_reward/centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/group_std_mean": 0.0013810679083690047,
|
|
"signal/format_reward/group_zero_std_frac": 0.9921875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00023651123046875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00023651123046875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022028597304597497,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00342005817219615,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.943118827010039e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.943118827010039e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.134110227227211,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17590243369340897,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024005728773772717,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024005728773772717,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13338283449411392,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1749531328678131,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023875526385381818,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023875526385381818,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.114329993724823,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.14955615997314453,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020465069683268666,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020465069683268666,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07507089525461197,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09895683825016022,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001343769021332264,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001343769021332264,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.051243193447589874,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06748097017407417,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009172531717922539,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009172531717922539,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.134110227227211,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17590243369340897,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024005728773772717,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024005728773772717,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00416590110398829,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005485004745423794,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005207376379985362,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005207376379985362,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.004686371226200255,
|
|
"train_runtime": 60660.1417,
|
|
"train_samples_per_second": 0.33,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1071619903,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|