6365 lines
407 KiB
JSON
6365 lines
407 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.5119607631252925,
|
|
"calibration/batch_distribution_entropy": 0.2767451001971738,
|
|
"calibration/confidence_entropy": 0.22007440379474952,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.47048212152499086,
|
|
"calibration/mean_confidence": 0.9157685071277196,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018576388888888885,
|
|
"completions/max_length": 4042.4,
|
|
"completions/max_terminated_length": 4042.4,
|
|
"completions/mean_length": 517.6447082519531,
|
|
"completions/mean_terminated_length": 527.4365112304688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.005689694546163082,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0074,
|
|
"num_tokens": 9077475.0,
|
|
"reward": 0.6640230894088746,
|
|
"reward_std": 0.6719910860061645,
|
|
"rewards/accuracy_reward": 0.26406249701976775,
|
|
"rewards/brier_reward": 0.31690160036087034,
|
|
"rewards/confidence_uniqueness_reward": 0.2950827181339264,
|
|
"rewards/format_reward": 0.6027777671813965,
|
|
"rewards/frontier_aurc_reward": 0.2791689395904541,
|
|
"rewards/frontier_ece_reward": 0.2791689395904541,
|
|
"rewards/frontier_entropy_batch_reward": -0.5742027401924134,
|
|
"rewards/volume_coverage_0": 0.2791689395904541,
|
|
"rewards/volume_coverage_1": 0.2791689395904541,
|
|
"rewards/volume_coverage_10": 0.2791689395904541,
|
|
"rewards/volume_coverage_15": 0.2791689395904541,
|
|
"rewards/volume_coverage_20": 0.2791689395904541,
|
|
"rewards/volume_coverage_25": 0.2791689395904541,
|
|
"rewards/volume_coverage_5": 0.2791689395904541,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.31510416865348817,
|
|
"signal/accuracy_reward/group_std_mean": 0.374676376581192,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.08055555745959282,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15755208432674409,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15755208432674409,
|
|
"signal/advantage_abs_mean": 0.5728595376014709,
|
|
"signal/advantage_pre_scale_abs_mean": 0.5728595376014709,
|
|
"signal/advantage_pre_scale_std": 0.6879928708076477,
|
|
"signal/advantage_std": 0.6879928708076477,
|
|
"signal/brier_reward/centered_abs_mean": 0.3253703832626343,
|
|
"signal/brier_reward/group_std_mean": 0.3777146339416504,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03253703787922859,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03253703787922859,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.23825904428958894,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2890412747859955,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023825905472040176,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023825905472040176,
|
|
"signal/format_reward/centered_abs_mean": 0.44292533993721006,
|
|
"signal/format_reward/group_std_mean": 0.47658112049102785,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.22146266996860503,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.22146266996860503,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31583258509635925,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3733566999435425,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003947907360270619,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003947907360270619,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31583258509635925,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3733566999435425,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45222132802009585,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.483779114484787,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04522213339805603,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04522213339805603,
|
|
"signal/volume_coverage_0/centered_abs_mean": 0.31583258509635925,
|
|
"signal/volume_coverage_0/group_std_mean": 0.3733566999435425,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_1/centered_abs_mean": 0.31583258509635925,
|
|
"signal/volume_coverage_1/group_std_mean": 0.3733566999435425,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_10/centered_abs_mean": 0.31583258509635925,
|
|
"signal/volume_coverage_10/group_std_mean": 0.3733566999435425,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.31583258509635925,
|
|
"signal/volume_coverage_15/group_std_mean": 0.3733566999435425,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.31583258509635925,
|
|
"signal/volume_coverage_20/group_std_mean": 0.3733566999435425,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.31583258509635925,
|
|
"signal/volume_coverage_25/group_std_mean": 0.3733566999435425,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_5/centered_abs_mean": 0.31583258509635925,
|
|
"signal/volume_coverage_5/group_std_mean": 0.3733566999435425,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.031583258882164955,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 0.031583258882164955,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5121039943055846,
|
|
"calibration/batch_distribution_entropy": 0.2556783321589634,
|
|
"calibration/confidence_entropy": 0.22334692393582234,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.46985558555759993,
|
|
"calibration/mean_confidence": 0.9213281837751548,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016840277777777767,
|
|
"completions/max_length": 3861.8,
|
|
"completions/max_terminated_length": 3861.8,
|
|
"completions/mean_length": 478.6218017578125,
|
|
"completions/mean_terminated_length": 487.04514770507814,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 14.2,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.005293714813888073,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 17673918.0,
|
|
"reward": 0.7621858239173889,
|
|
"reward_std": 0.6452823042869568,
|
|
"rewards/accuracy_reward": 0.29513888955116274,
|
|
"rewards/brier_reward": 0.35790597200393676,
|
|
"rewards/confidence_uniqueness_reward": 0.3535000741481781,
|
|
"rewards/format_reward": 0.7201388716697693,
|
|
"rewards/frontier_aurc_reward": 0.3103605091571808,
|
|
"rewards/frontier_ece_reward": 0.3103605091571808,
|
|
"rewards/frontier_entropy_batch_reward": -0.6876158952713013,
|
|
"rewards/volume_coverage_0": 0.3103605091571808,
|
|
"rewards/volume_coverage_1": 0.3103605091571808,
|
|
"rewards/volume_coverage_10": 0.3103605091571808,
|
|
"rewards/volume_coverage_15": 0.3103605091571808,
|
|
"rewards/volume_coverage_20": 0.3103605091571808,
|
|
"rewards/volume_coverage_25": 0.3103605091571808,
|
|
"rewards/volume_coverage_5": 0.3103605091571808,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.324945741891861,
|
|
"signal/accuracy_reward/group_std_mean": 0.38479640483856203,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.06666666939854622,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1624728709459305,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1624728709459305,
|
|
"signal/advantage_abs_mean": 0.5411736607551575,
|
|
"signal/advantage_pre_scale_abs_mean": 0.5411736607551575,
|
|
"signal/advantage_pre_scale_std": 0.6594692945480347,
|
|
"signal/advantage_std": 0.6594692945480347,
|
|
"signal/brier_reward/centered_abs_mean": 0.3189652979373932,
|
|
"signal/brier_reward/group_std_mean": 0.37320741415023806,
|
|
"signal/brier_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031896531209349634,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.031896531209349634,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2224169671535492,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2778456211090088,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02224169746041298,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02224169746041298,
|
|
"signal/format_reward/centered_abs_mean": 0.35045573115348816,
|
|
"signal/format_reward/group_std_mean": 0.41651219725608823,
|
|
"signal/format_reward/group_zero_std_frac": 0.008333333395421505,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.17522786557674408,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.17522786557674408,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3180912435054779,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.37567706108093263,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00397614068351686,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00397614068351686,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3180912435054779,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.37567706108093263,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3786880075931549,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.43963631987571716,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03786880299448967,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03786880299448967,
|
|
"signal/volume_coverage_0/centered_abs_mean": 0.3180912435054779,
|
|
"signal/volume_coverage_0/group_std_mean": 0.37567706108093263,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_1/centered_abs_mean": 0.3180912435054779,
|
|
"signal/volume_coverage_1/group_std_mean": 0.37567706108093263,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_10/centered_abs_mean": 0.3180912435054779,
|
|
"signal/volume_coverage_10/group_std_mean": 0.37567706108093263,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.3180912435054779,
|
|
"signal/volume_coverage_15/group_std_mean": 0.37567706108093263,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.3180912435054779,
|
|
"signal/volume_coverage_20/group_std_mean": 0.37567706108093263,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.3180912435054779,
|
|
"signal/volume_coverage_25/group_std_mean": 0.37567706108093263,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_5/centered_abs_mean": 0.3180912435054779,
|
|
"signal/volume_coverage_5/group_std_mean": 0.37567706108093263,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.03180912546813488,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 0.03180912546813488,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5135904311928525,
|
|
"calibration/batch_distribution_entropy": 0.281091964786097,
|
|
"calibration/confidence_entropy": 0.22602702961388318,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.48452310285305156,
|
|
"calibration/mean_confidence": 0.9164284363135969,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009895833333333326,
|
|
"completions/max_length": 3757.0,
|
|
"completions/max_terminated_length": 3757.0,
|
|
"completions/mean_length": 459.0904541015625,
|
|
"completions/mean_terminated_length": 463.7198120117188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 64.8,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.00195339135825634,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0134,
|
|
"num_tokens": 26064624.0,
|
|
"reward": 0.9528237581253052,
|
|
"reward_std": 0.5587735056877137,
|
|
"rewards/accuracy_reward": 0.3482638895511627,
|
|
"rewards/brier_reward": 0.44555225372314455,
|
|
"rewards/confidence_uniqueness_reward": 0.49732959270477295,
|
|
"rewards/format_reward": 0.9381944417953492,
|
|
"rewards/frontier_aurc_reward": 0.3752441704273224,
|
|
"rewards/frontier_ece_reward": 0.3752441704273224,
|
|
"rewards/frontier_entropy_batch_reward": -0.8957948088645935,
|
|
"rewards/volume_coverage_0": 0.3752441704273224,
|
|
"rewards/volume_coverage_1": 0.3752441704273224,
|
|
"rewards/volume_coverage_10": 0.3752441704273224,
|
|
"rewards/volume_coverage_15": 0.3752441704273224,
|
|
"rewards/volume_coverage_20": 0.3752441704273224,
|
|
"rewards/volume_coverage_25": 0.3752441704273224,
|
|
"rewards/volume_coverage_5": 0.3752441704273224,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.32180989980697633,
|
|
"signal/accuracy_reward/group_std_mean": 0.38036916255950926,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.07500000223517418,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16090494990348816,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16090494990348816,
|
|
"signal/advantage_abs_mean": 0.46738156080245974,
|
|
"signal/advantage_pre_scale_abs_mean": 0.46738156080245974,
|
|
"signal/advantage_pre_scale_std": 0.5751462697982788,
|
|
"signal/advantage_std": 0.5751462697982788,
|
|
"signal/brier_reward/centered_abs_mean": 0.30256916880607604,
|
|
"signal/brier_reward/group_std_mean": 0.3543079555034637,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030256916582584382,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.030256916582584382,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1871120035648346,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2356875717639923,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01871120072901249,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01871120072901249,
|
|
"signal/format_reward/centered_abs_mean": 0.10648871511220932,
|
|
"signal/format_reward/group_std_mean": 0.18728102892637252,
|
|
"signal/format_reward/group_zero_std_frac": 0.29444444477558135,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05324435755610466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.05324435755610466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.314031195640564,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3691504061222076,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003925389749929309,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003925389749929309,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.314031195640564,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3691504061222076,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17255694419145584,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2781273782253265,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0888888917863369,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017255694791674613,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017255694791674613,
|
|
"signal/volume_coverage_0/centered_abs_mean": 0.314031195640564,
|
|
"signal/volume_coverage_0/group_std_mean": 0.3691504061222076,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_1/centered_abs_mean": 0.314031195640564,
|
|
"signal/volume_coverage_1/group_std_mean": 0.3691504061222076,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_10/centered_abs_mean": 0.314031195640564,
|
|
"signal/volume_coverage_10/group_std_mean": 0.3691504061222076,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.314031195640564,
|
|
"signal/volume_coverage_15/group_std_mean": 0.3691504061222076,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.314031195640564,
|
|
"signal/volume_coverage_20/group_std_mean": 0.3691504061222076,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.314031195640564,
|
|
"signal/volume_coverage_25/group_std_mean": 0.3691504061222076,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_5/centered_abs_mean": 0.314031195640564,
|
|
"signal/volume_coverage_5/group_std_mean": 0.3691504061222076,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.031403117999434474,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 0.031403117999434474,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4400840846735198,
|
|
"calibration/batch_distribution_entropy": 0.38120191367823736,
|
|
"calibration/buffer_distribution_entropy": 0.29442792961319336,
|
|
"calibration/confidence_entropy": 0.29531402386614836,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.2295914079238342,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.36975757858520253,
|
|
"calibration/mean_confidence": 0.8869711695530038,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00894097222222221,
|
|
"completions/max_length": 4041.4,
|
|
"completions/max_terminated_length": 4041.4,
|
|
"completions/mean_length": 495.19219360351565,
|
|
"completions/mean_terminated_length": 499.751171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 96.0,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.0007564106490463018,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0063,
|
|
"num_tokens": 34882934.0,
|
|
"reward": 0.8950790762901306,
|
|
"reward_std": 0.34014883935451506,
|
|
"rewards/accuracy_reward": 0.4636284828186035,
|
|
"rewards/brier_reward": 0.5718536376953125,
|
|
"rewards/confidence_uniqueness_reward": 0.5620216369628906,
|
|
"rewards/format_reward": 0.9834201335906982,
|
|
"rewards/frontier_aurc_reward": 0.18470853520557284,
|
|
"rewards/frontier_ece_reward": 0.18528626561164857,
|
|
"rewards/frontier_entropy_batch_reward": -0.9422903299331665,
|
|
"rewards/volume_coverage_0": 0.18794108111264332,
|
|
"rewards/volume_coverage_1": 0.18794108111264332,
|
|
"rewards/volume_coverage_10": 0.18794108111264332,
|
|
"rewards/volume_coverage_15": 0.18794108169834006,
|
|
"rewards/volume_coverage_20": 0.1879410865440863,
|
|
"rewards/volume_coverage_25": 0.18794108712614993,
|
|
"rewards/volume_coverage_5": 0.18794108111264332,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2878743469715118,
|
|
"signal/accuracy_reward/group_std_mean": 0.35547043681144713,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1439371734857559,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1439371734857559,
|
|
"signal/advantage_abs_mean": 0.272026863694191,
|
|
"signal/advantage_pre_scale_abs_mean": 0.272026863694191,
|
|
"signal/advantage_pre_scale_std": 0.3537022441625595,
|
|
"signal/advantage_std": 0.3537022441625595,
|
|
"signal/brier_reward/centered_abs_mean": 0.2541215270757675,
|
|
"signal/brier_reward/group_std_mean": 0.31329566836357114,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025412153080105783,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.025412153080105783,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.19161063432693481,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2286382406949997,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019161063805222513,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019161063805222513,
|
|
"signal/format_reward/centered_abs_mean": 0.030018445663154127,
|
|
"signal/format_reward/group_std_mean": 0.06528293080627919,
|
|
"signal/format_reward/group_zero_std_frac": 0.7027777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015009222831577063,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015009222831577063,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.11436173026449978,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.14147313190624117,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0014295217762992252,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0014295217762992252,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.19664273262023926,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.23796773850917816,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.019664275087416173,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.019664275087416173,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10116236060857772,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.18831903338432313,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.30555555522441863,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01011623591184616,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01011623591184616,
|
|
"signal/volume_coverage_0/centered_abs_mean": 0.11197410996287056,
|
|
"signal/volume_coverage_0/group_std_mean": 0.1381310252406534,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.5555555582046509,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 0.011197412188379958,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 0.011197412188379958,
|
|
"signal/volume_coverage_1/centered_abs_mean": 0.11197410996287056,
|
|
"signal/volume_coverage_1/group_std_mean": 0.1381310252406534,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.5555555582046509,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 0.011197412188379958,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 0.011197412188379958,
|
|
"signal/volume_coverage_10/centered_abs_mean": 0.11197410996287056,
|
|
"signal/volume_coverage_10/group_std_mean": 0.1381310252406534,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.5555555582046509,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 0.011197412188379958,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 0.011197412188379958,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.11197411371267538,
|
|
"signal/volume_coverage_15/group_std_mean": 0.13813103191433065,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.4888888955116272,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.011197412563360465,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.011197412563360465,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.11197412589439945,
|
|
"signal/volume_coverage_20/group_std_mean": 0.13813105202186832,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.4888888955116272,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.011197413781532606,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.011197413781532606,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.11197412752295328,
|
|
"signal/volume_coverage_25/group_std_mean": 0.13813105459340544,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.4888888955116272,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.011197413944388224,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.011197413944388224,
|
|
"signal/volume_coverage_5/centered_abs_mean": 0.11197410996287056,
|
|
"signal/volume_coverage_5/group_std_mean": 0.1381310252406534,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.5555555582046509,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 0.011197412188379958,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 0.011197412188379958,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3659491213453068,
|
|
"calibration/batch_distribution_entropy": 0.4747177255582935,
|
|
"calibration/buffer_distribution_entropy": 0.33521721039781777,
|
|
"calibration/confidence_entropy": 0.32741087611064146,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.019270833333333334,
|
|
"calibration/coverage@20%": 0.019270833333333334,
|
|
"calibration/coverage@25%": 0.21563344594594597,
|
|
"calibration/coverage@30%": 0.3589189189189189,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.28585367982933974,
|
|
"calibration/mean_confidence": 0.8670943425313327,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011284722222222232,
|
|
"completions/max_length": 3761.6,
|
|
"completions/max_terminated_length": 3761.6,
|
|
"completions/mean_length": 535.991943359375,
|
|
"completions/mean_terminated_length": 542.1177734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 105.8,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.0008135500829666853,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0054,
|
|
"num_tokens": 44182009.0,
|
|
"reward": 0.797996187210083,
|
|
"reward_std": 0.22051306068897247,
|
|
"rewards/accuracy_reward": 0.5412326335906983,
|
|
"rewards/brier_reward": 0.6488373517990113,
|
|
"rewards/confidence_uniqueness_reward": 0.6406527280807495,
|
|
"rewards/format_reward": 0.9836805462837219,
|
|
"rewards/frontier_aurc_reward": -0.004265864612534642,
|
|
"rewards/frontier_ece_reward": 0.010874219285324217,
|
|
"rewards/frontier_entropy_batch_reward": -0.9444352984428406,
|
|
"rewards/volume_coverage_0": 8.792097189180836e-10,
|
|
"rewards/volume_coverage_1": 8.792097189180836e-10,
|
|
"rewards/volume_coverage_10": 2.2515131453682e-09,
|
|
"rewards/volume_coverage_15": 3.5743737192284187e-09,
|
|
"rewards/volume_coverage_20": 5.067362618405013e-09,
|
|
"rewards/volume_coverage_25": 1.9802690931491895e-08,
|
|
"rewards/volume_coverage_5": 8.792097189180836e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.27207574248313904,
|
|
"signal/accuracy_reward/group_std_mean": 0.33846710324287416,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.11944444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13603787124156952,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13603787124156952,
|
|
"signal/advantage_abs_mean": 0.1727396160364151,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1727396160364151,
|
|
"signal/advantage_pre_scale_std": 0.23474966883659362,
|
|
"signal/advantage_std": 0.23474966883659362,
|
|
"signal/brier_reward/centered_abs_mean": 0.22204743921756745,
|
|
"signal/brier_reward/group_std_mean": 0.2769301772117615,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022204744815826415,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022204744815826415,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.15974532812833786,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1887336254119873,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015974533185362814,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015974533185362814,
|
|
"signal/format_reward/centered_abs_mean": 0.02893880233168602,
|
|
"signal/format_reward/group_std_mean": 0.058059143275022505,
|
|
"signal/format_reward/group_zero_std_frac": 0.7500000119209289,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01446940116584301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01446940116584301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031789666507393123,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004654883686453104,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.973708517150954e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.973708517150954e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.11750385165214539,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.14042254984378816,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011750385351479053,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011750385351479053,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.09630790203809739,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.18353441655635833,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.34166666865348816,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009630790445953608,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009630790445953608,
|
|
"signal/volume_coverage_0/centered_abs_mean": 4.802972108919334e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 7.952000213862221e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7916666626930237,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.802972020101492e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.802972020101492e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 4.802972108919334e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 7.952000213862221e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7916666626930237,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.802972020101492e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.802972020101492e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 7.63057674824097e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 1.2189424847264264e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.7777777731418609,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.630577059103416e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 7.630577059103416e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 2.999649408863547e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 5.054619922617576e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.6666666746139527,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.9996494010919862e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.9996494010919862e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 5.12322889001382e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 8.749845026301273e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.5916666865348816,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.123229118719763e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 5.123229118719763e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 1.4009428435102221e-07,
|
|
"signal/volume_coverage_25/group_std_mean": 2.419580729551285e-07,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.5361111283302307,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.4009429694095133e-08,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.4009429694095133e-08,
|
|
"signal/volume_coverage_5/centered_abs_mean": 4.802972108919334e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 7.952000213862221e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.7916666626930237,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.802972020101492e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.802972020101492e-10,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.290252031438701,
|
|
"calibration/batch_distribution_entropy": 0.6670356474414376,
|
|
"calibration/buffer_distribution_entropy": 0.3984471740039156,
|
|
"calibration/confidence_entropy": 0.4580460844401667,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.013860939021464559,
|
|
"calibration/coverage@15%": 0.013860939021464559,
|
|
"calibration/coverage@20%": 0.09637460022365034,
|
|
"calibration/coverage@25%": 0.27948800161988807,
|
|
"calibration/coverage@30%": 0.6027563221881407,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.14154371937498994,
|
|
"calibration/mean_confidence": 0.7846670285807927,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014756944444444442,
|
|
"completions/max_length": 3983.8,
|
|
"completions/max_terminated_length": 3983.8,
|
|
"completions/mean_length": 596.7978393554688,
|
|
"completions/mean_terminated_length": 605.7838012695313,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 131.0,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0004709550703410059,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.0083,
|
|
"num_tokens": 54167040.0,
|
|
"reward": 0.8399909019470215,
|
|
"reward_std": 0.1914364665746689,
|
|
"rewards/accuracy_reward": 0.596788203716278,
|
|
"rewards/brier_reward": 0.7140596985816956,
|
|
"rewards/confidence_uniqueness_reward": 0.7248314023017883,
|
|
"rewards/format_reward": 0.9833333373069764,
|
|
"rewards/frontier_aurc_reward": -0.00332138747908175,
|
|
"rewards/frontier_ece_reward": 0.016705350019037724,
|
|
"rewards/frontier_entropy_batch_reward": -0.9558795094490051,
|
|
"rewards/volume_coverage_0": -9.216823436408727e-10,
|
|
"rewards/volume_coverage_1": -9.216823436408727e-10,
|
|
"rewards/volume_coverage_10": -1.096071113732e-09,
|
|
"rewards/volume_coverage_15": -1.5953446425021055e-09,
|
|
"rewards/volume_coverage_20": -3.5614115323490126e-09,
|
|
"rewards/volume_coverage_25": -3.806372817938275e-07,
|
|
"rewards/volume_coverage_5": -9.216823436408727e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2374620258808136,
|
|
"signal/accuracy_reward/group_std_mean": 0.3019864022731781,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.1833333343267441,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1187310129404068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1187310129404068,
|
|
"signal/advantage_abs_mean": 0.14635236859321593,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14635236859321593,
|
|
"signal/advantage_pre_scale_std": 0.2144080013036728,
|
|
"signal/advantage_std": 0.2144080013036728,
|
|
"signal/brier_reward/centered_abs_mean": 0.17288005352020264,
|
|
"signal/brier_reward/group_std_mean": 0.21867653727531433,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01728800553828478,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01728800553828478,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0963991716504097,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.12703455239534378,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009639917686581612,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009639917686581612,
|
|
"signal/format_reward/centered_abs_mean": 0.02838541641831398,
|
|
"signal/format_reward/group_std_mean": 0.05298890024423599,
|
|
"signal/format_reward/group_zero_std_frac": 0.7861111283302307,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01419270820915699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01419270820915699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019600596046075226,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002990162093192339,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4500745348632334e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4500745348632334e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07730323448777199,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09534137547016144,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007730323821306229,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007730323821306229,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07685805186629295,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.1518291175365448,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4444444477558136,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.007685805577784777,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.007685805577784777,
|
|
"signal/volume_coverage_0/centered_abs_mean": 4.0110336330112515e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 5.887272230831186e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9277777791023254,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.0110334810494754e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.0110334810494754e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 4.0110336330112515e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 5.887272230831186e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9277777791023254,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.0110334810494754e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.0110334810494754e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 4.491473498680065e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 6.584358708261462e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9277777791023254,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.4914740572610247e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.4914740572610247e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 7.2037741438713e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 1.065876080730277e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8611111044883728,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.203774146646857e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 7.203774146646857e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 1.8666235868103344e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.672936527670089e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7694444358348846,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.866623697277525e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.866623697277525e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 1.0624473182341276e-06,
|
|
"signal/volume_coverage_25/group_std_mean": 1.5403513770051447e-06,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.6305555611848831,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.0624474773013315e-07,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.0624474773013315e-07,
|
|
"signal/volume_coverage_5/centered_abs_mean": 4.0110336330112515e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 5.887272230831186e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9277777791023254,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.0110334810494754e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.0110334810494754e-10,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25534025811798067,
|
|
"calibration/batch_distribution_entropy": 0.6576576613585013,
|
|
"calibration/buffer_distribution_entropy": 0.49322937770285424,
|
|
"calibration/confidence_entropy": 0.5542242160036144,
|
|
"calibration/coverage@0%": 0.002638522427440633,
|
|
"calibration/coverage@1%": 0.002638522427440633,
|
|
"calibration/coverage@10%": 0.05399434253326074,
|
|
"calibration/coverage@15%": 0.14363486724367822,
|
|
"calibration/coverage@20%": 0.22828949830648027,
|
|
"calibration/coverage@25%": 0.4233740844404405,
|
|
"calibration/coverage@30%": 0.7761904761904762,
|
|
"calibration/coverage@5%": 0.002638522427440633,
|
|
"calibration/ece": 0.09020366927006476,
|
|
"calibration/mean_confidence": 0.7040882964997379,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01692708333333335,
|
|
"completions/max_length": 3935.8,
|
|
"completions/max_terminated_length": 3935.8,
|
|
"completions/mean_length": 619.6548706054688,
|
|
"completions/mean_terminated_length": 630.3608642578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 172.0,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.00048439911915920675,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0091,
|
|
"num_tokens": 64382904.0,
|
|
"reward": 0.8553146839141845,
|
|
"reward_std": 0.17324375808238984,
|
|
"rewards/accuracy_reward": 0.63359375,
|
|
"rewards/brier_reward": 0.7494083523750306,
|
|
"rewards/confidence_uniqueness_reward": 0.6619056224822998,
|
|
"rewards/format_reward": 0.9808159589767456,
|
|
"rewards/frontier_aurc_reward": -0.0026945109479129315,
|
|
"rewards/frontier_ece_reward": 0.012076981551945209,
|
|
"rewards/frontier_entropy_batch_reward": -0.9419560909271241,
|
|
"rewards/volume_coverage_0": -3.125959491256936e-11,
|
|
"rewards/volume_coverage_1": -3.125959491256936e-11,
|
|
"rewards/volume_coverage_10": -9.113416687966946e-11,
|
|
"rewards/volume_coverage_15": 2.0238500209046604e-09,
|
|
"rewards/volume_coverage_20": -1.1439319269612725e-09,
|
|
"rewards/volume_coverage_25": -2.410270205643883e-10,
|
|
"rewards/volume_coverage_5": -3.125959491256936e-11,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20619032382965088,
|
|
"signal/accuracy_reward/group_std_mean": 0.268117618560791,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2555555611848831,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10309516191482544,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10309516191482544,
|
|
"signal/advantage_abs_mean": 0.12986526787281036,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12986526787281036,
|
|
"signal/advantage_pre_scale_std": 0.19552622437477113,
|
|
"signal/advantage_std": 0.19552622437477113,
|
|
"signal/brier_reward/centered_abs_mean": 0.13110830038785934,
|
|
"signal/brier_reward/group_std_mean": 0.1714950382709503,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013110830076038837,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013110830076038837,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18433848023414612,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2146785318851471,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01843384765088558,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01843384765088558,
|
|
"signal/format_reward/centered_abs_mean": 0.02930230051279068,
|
|
"signal/format_reward/group_std_mean": 0.051526063680648805,
|
|
"signal/format_reward/group_zero_std_frac": 0.8,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01465115025639534,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01465115025639534,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012397329090163111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00201979277189821,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.549666176288156e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.549666176288156e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04581320658326149,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06267823949456215,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004581320798024535,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004581320798024535,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0961960181593895,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.18250569701194763,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.361111119389534,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009619602188467979,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.009619602188467979,
|
|
"signal/volume_coverage_0/centered_abs_mean": 6.0636567766891855e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 7.854496608672434e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.830555546283722,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.063656410315588e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.063656410315588e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 6.0636567766891855e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 7.854496608672434e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.830555546283722,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.063656410315588e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.063656410315588e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 8.75501608987861e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 1.1354882900604934e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.830555546283722,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.755016323025444e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.755016323025444e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.7241145222612887e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 2.2463165993968914e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.830555546283722,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.7241145178203965e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.7241145178203965e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 3.248467468353056e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 4.19108733495932e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7638888835906983,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.2484676504296315e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.2484676504296315e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 7.086284341539794e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 9.157187932373745e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.7305555582046509,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.086284181667679e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 7.086284181667679e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 6.0636567766891855e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 7.854496608672434e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.830555546283722,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.063656410315588e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 6.063656410315588e-10,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30536688998434725,
|
|
"calibration/batch_distribution_entropy": 0.6904094763169336,
|
|
"calibration/buffer_distribution_entropy": 0.5629806669593707,
|
|
"calibration/confidence_entropy": 0.5380215214731939,
|
|
"calibration/coverage@0%": 0.005221932114882507,
|
|
"calibration/coverage@1%": 0.005221932114882507,
|
|
"calibration/coverage@10%": 0.006266318537859007,
|
|
"calibration/coverage@15%": 0.037655678347150126,
|
|
"calibration/coverage@20%": 0.1277168750056618,
|
|
"calibration/coverage@25%": 0.3079806315332236,
|
|
"calibration/coverage@30%": 0.35630949733989137,
|
|
"calibration/coverage@5%": 0.005221932114882507,
|
|
"calibration/ece": 0.11611571468320019,
|
|
"calibration/mean_confidence": 0.7227581189811529,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01640625,
|
|
"completions/max_length": 3681.4,
|
|
"completions/max_terminated_length": 3681.4,
|
|
"completions/mean_length": 638.3761352539062,
|
|
"completions/mean_terminated_length": 649.0361328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.4,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0004877288010902703,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0111,
|
|
"num_tokens": 74856517.0,
|
|
"reward": 0.8764391064643859,
|
|
"reward_std": 0.17399394512176514,
|
|
"rewards/accuracy_reward": 0.6493923544883728,
|
|
"rewards/brier_reward": 0.7531128406524659,
|
|
"rewards/confidence_uniqueness_reward": 0.723856520652771,
|
|
"rewards/format_reward": 0.9821180462837219,
|
|
"rewards/frontier_aurc_reward": -0.002503009606152773,
|
|
"rewards/frontier_ece_reward": 0.00990740694105625,
|
|
"rewards/frontier_entropy_batch_reward": -0.8797252178192139,
|
|
"rewards/volume_coverage_0": -9.722430145686634e-10,
|
|
"rewards/volume_coverage_1": -9.722430145686634e-10,
|
|
"rewards/volume_coverage_10": -1.6528835806994823e-09,
|
|
"rewards/volume_coverage_15": -2.086966972153492e-09,
|
|
"rewards/volume_coverage_20": -5.745023112790903e-09,
|
|
"rewards/volume_coverage_25": -1.698299545438431e-08,
|
|
"rewards/volume_coverage_5": -1.0142617808128752e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19512261152267457,
|
|
"signal/accuracy_reward/group_std_mean": 0.25884974002838135,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.272222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09756130576133729,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09756130576133729,
|
|
"signal/advantage_abs_mean": 0.12614074647426604,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12614074647426604,
|
|
"signal/advantage_pre_scale_std": 0.19753454029560089,
|
|
"signal/advantage_std": 0.19753454029560089,
|
|
"signal/brier_reward/centered_abs_mean": 0.13642587661743164,
|
|
"signal/brier_reward/group_std_mean": 0.17877306342124938,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013642588630318642,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013642588630318642,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.13335272669792175,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16444715857505798,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013335273042321205,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013335273042321205,
|
|
"signal/format_reward/centered_abs_mean": 0.030859375,
|
|
"signal/format_reward/group_std_mean": 0.05699694380164146,
|
|
"signal/format_reward/group_zero_std_frac": 0.7722222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0154296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0154296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00158603445161134,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002575516002252698,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9825430717901328e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9825430717901328e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04253996312618256,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06416215375065804,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004253996396437287,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004253996396437287,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19839468747377395,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32432641088962555,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08333333693444729,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019839468784630297,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019839468784630297,
|
|
"signal/volume_coverage_0/centered_abs_mean": 5.006984452743391e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 6.621276663065778e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.8722222208976745,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.006984546591931e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.006984546591931e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 5.006984452743391e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 6.621276663065778e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.8722222208976745,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.006984546591931e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.006984546591931e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 1.854433347228679e-08,
|
|
"signal/volume_coverage_10/group_std_mean": 2.3904849991152056e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.8722222208976745,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.8544333788179934e-09,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.8544333788179934e-09,
|
|
"signal/volume_coverage_15/centered_abs_mean": 2.2667969682560462e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 2.9268863091513885e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8527777671813965,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.2667969193541916e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.2667969193541916e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 5.436813834691634e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 7.13955294884272e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.6250000059604645,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.436813643733273e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 5.436813643733273e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 9.411375607726314e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 1.2240776126049013e-07,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.5611111104488373,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 9.411375756496199e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 9.411375756496199e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.1767533551060306e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 1.5185777238468924e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.8722222208976745,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1767534000180213e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.1767534000180213e-09,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20959151987755686,
|
|
"calibration/batch_distribution_entropy": 0.8048497108063604,
|
|
"calibration/buffer_distribution_entropy": 0.610253581159669,
|
|
"calibration/confidence_entropy": 0.5086275835583615,
|
|
"calibration/coverage@0%": 0.01462140992167102,
|
|
"calibration/coverage@1%": 0.01462140992167102,
|
|
"calibration/coverage@10%": 0.06981299726511095,
|
|
"calibration/coverage@15%": 0.17435541676288793,
|
|
"calibration/coverage@20%": 0.4382758104297634,
|
|
"calibration/coverage@25%": 0.7764967981818407,
|
|
"calibration/coverage@30%": 0.969482288828338,
|
|
"calibration/coverage@5%": 0.03694196825577507,
|
|
"calibration/ece": 0.08464355289125539,
|
|
"calibration/mean_confidence": 0.711897336887368,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016232638888888883,
|
|
"completions/max_length": 3790.4,
|
|
"completions/max_terminated_length": 3790.4,
|
|
"completions/mean_length": 671.12412109375,
|
|
"completions/mean_terminated_length": 682.1524047851562,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 191.6,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.0004558164218906313,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0118,
|
|
"num_tokens": 85723131.0,
|
|
"reward": 0.9220384001731873,
|
|
"reward_std": 0.17496635913848876,
|
|
"rewards/accuracy_reward": 0.6446180462837219,
|
|
"rewards/brier_reward": 0.7602893829345703,
|
|
"rewards/confidence_uniqueness_reward": 0.890224039554596,
|
|
"rewards/format_reward": 0.9828125,
|
|
"rewards/frontier_aurc_reward": -0.0022315266309306026,
|
|
"rewards/frontier_ece_reward": 0.007368552498519421,
|
|
"rewards/frontier_entropy_batch_reward": -0.5743717849254608,
|
|
"rewards/volume_coverage_0": -4.818908792836307e-10,
|
|
"rewards/volume_coverage_1": -4.818908792836307e-10,
|
|
"rewards/volume_coverage_10": -7.52855996294377e-10,
|
|
"rewards/volume_coverage_15": -9.36817042690441e-10,
|
|
"rewards/volume_coverage_20": -4.016447382321209e-09,
|
|
"rewards/volume_coverage_25": -8.517154528875359e-09,
|
|
"rewards/volume_coverage_5": -4.818908792836307e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19286024272441865,
|
|
"signal/accuracy_reward/group_std_mean": 0.2544838279485703,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.27500000298023225,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09643012136220933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09643012136220933,
|
|
"signal/advantage_abs_mean": 0.13153678625822068,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13153678625822068,
|
|
"signal/advantage_pre_scale_std": 0.19726994037628173,
|
|
"signal/advantage_std": 0.19726994037628173,
|
|
"signal/brier_reward/centered_abs_mean": 0.15091899931430816,
|
|
"signal/brier_reward/group_std_mean": 0.19479366540908813,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015091900154948235,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015091900154948235,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07453014776110649,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09904419332742691,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007453015027567744,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007453015027567744,
|
|
"signal/format_reward/centered_abs_mean": 0.02777777723968029,
|
|
"signal/format_reward/group_std_mean": 0.04943772032856941,
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013888888619840145,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013888888619840145,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021528689889237286,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003593483520671725,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6910861197393388e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6910861197393388e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.055251818150281906,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08745218813419342,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0055251818150281904,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0055251818150281904,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4188727140426636,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4806748628616333,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04188727214932442,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04188727214932442,
|
|
"signal/volume_coverage_0/centered_abs_mean": 2.894947714882079e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 4.090193839179079e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9194444417953491,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.894947746800991e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.894947746800991e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 2.894947714882079e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 4.090193839179079e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9194444417953491,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.894947746800991e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.894947746800991e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 5.0805231688322294e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 7.21450300611437e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9194444417953491,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.080523245160062e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 5.080523245160062e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 8.006840668262072e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 1.1326063742433946e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8388888835906982,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 8.006841072105697e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 8.006841072105697e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 1.9538272122421318e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.731816258760844e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7027777850627899,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.9538273103580917e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.9538273103580917e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 3.664357803856433e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 5.045283835158898e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.6305555552244186,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.6643578893436056e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.6643578893436056e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 2.894947714882079e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 4.090193839179079e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9194444417953491,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.894947746800991e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.894947746800991e-10,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.43461842487265534,
|
|
"calibration/batch_distribution_entropy": 0.9674538610560927,
|
|
"calibration/buffer_distribution_entropy": 0.6754656669440455,
|
|
"calibration/confidence_entropy": 0.5427756092593146,
|
|
"calibration/coverage@0%": 0.0010666666666666667,
|
|
"calibration/coverage@1%": 0.0010666666666666667,
|
|
"calibration/coverage@10%": 0.0010666666666666667,
|
|
"calibration/coverage@15%": 0.020158585377814314,
|
|
"calibration/coverage@20%": 0.02236244212712561,
|
|
"calibration/coverage@25%": 0.06972652541040605,
|
|
"calibration/coverage@30%": 0.24491702153885683,
|
|
"calibration/coverage@5%": 0.0010666666666666667,
|
|
"calibration/ece": 0.19636142539546436,
|
|
"calibration/mean_confidence": 0.5191269183857361,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014409722222222188,
|
|
"completions/max_length": 3298.4,
|
|
"completions/max_terminated_length": 3298.4,
|
|
"completions/mean_length": 676.2962646484375,
|
|
"completions/mean_terminated_length": 686.1054077148438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 165.4,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0005109410267323256,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.0114,
|
|
"num_tokens": 96611664.0,
|
|
"reward": 0.9440282225608826,
|
|
"reward_std": 0.16083419620990752,
|
|
"rewards/accuracy_reward": 0.6298611164093018,
|
|
"rewards/brier_reward": 0.7107447504997253,
|
|
"rewards/confidence_uniqueness_reward": 0.936470878124237,
|
|
"rewards/format_reward": 0.9849826335906983,
|
|
"rewards/frontier_aurc_reward": -0.0021832690108567476,
|
|
"rewards/frontier_ece_reward": -0.00903816195204854,
|
|
"rewards/frontier_entropy_batch_reward": -0.2718408614397049,
|
|
"rewards/volume_coverage_0": -5.804163025202591e-10,
|
|
"rewards/volume_coverage_1": -5.804163025202591e-10,
|
|
"rewards/volume_coverage_10": -6.20904803960709e-10,
|
|
"rewards/volume_coverage_15": -8.302681847593973e-10,
|
|
"rewards/volume_coverage_20": -1.2872351011494753e-09,
|
|
"rewards/volume_coverage_25": -6.472662150702746e-09,
|
|
"rewards/volume_coverage_5": -5.804163025202591e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18740234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.24431885480880738,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.32222222685813906,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.093701171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.093701171875,
|
|
"signal/advantage_abs_mean": 0.12112562209367753,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12112562209367753,
|
|
"signal/advantage_pre_scale_std": 0.18272640705108642,
|
|
"signal/advantage_std": 0.18272640705108642,
|
|
"signal/brier_reward/centered_abs_mean": 0.20210157334804535,
|
|
"signal/brier_reward/group_std_mean": 0.2504777073860168,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020210156962275506,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020210156962275506,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03604005612432957,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05931617692112923,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036040056031197308,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036040056031197308,
|
|
"signal/format_reward/centered_abs_mean": 0.024745008908212185,
|
|
"signal/format_reward/group_std_mean": 0.04588761366903782,
|
|
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012372504454106092,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012372504454106092,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016047300770878792,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027223533019423486,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0059126836713403e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0059126836713403e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06495674103498458,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10239707678556442,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006495674047619105,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006495674047619105,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35456995368003846,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42566134333610534,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035456997156143186,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035456997156143186,
|
|
"signal/volume_coverage_0/centered_abs_mean": 7.521659972642424e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 9.438697748387348e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9111111044883728,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.521660093379179e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.521660093379179e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 7.521659972642424e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 9.438697748387348e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9111111044883728,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.521660093379179e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.521660093379179e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 8.372495108321943e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 1.0491685134561558e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9111111044883728,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.372495495512222e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.372495495512222e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.696994389455142e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 2.1219557444496218e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8777777671813964,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.6969943616995664e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.6969943616995664e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 3.173251870158822e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 3.9903456272583074e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.1732520250349337e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.1732520250349337e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 1.1645886166533614e-07,
|
|
"signal/volume_coverage_25/group_std_mean": 1.4611043983592253e-07,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.600000011920929,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.1645886754951817e-08,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.1645886754951817e-08,
|
|
"signal/volume_coverage_5/centered_abs_mean": 7.521659972642424e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 9.438697748387348e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9111111044883728,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.521660093379179e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 7.521660093379179e-10,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.26401178674977815,
|
|
"eval_calibration/batch_distribution_entropy": 0.8917495100142864,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7278190128157204,
|
|
"eval_calibration/confidence_entropy": 0.5585034578569301,
|
|
"eval_calibration/coverage@0%": 0.10601478494623656,
|
|
"eval_calibration/coverage@1%": 0.10601478494623656,
|
|
"eval_calibration/coverage@10%": 0.1483534946236559,
|
|
"eval_calibration/coverage@15%": 0.2056451612903226,
|
|
"eval_calibration/coverage@20%": 0.41431451612903225,
|
|
"eval_calibration/coverage@25%": 0.712029569892473,
|
|
"eval_calibration/coverage@30%": 0.8333333333333334,
|
|
"eval_calibration/coverage@5%": 0.10601478494623656,
|
|
"eval_calibration/ece": 0.3042033713434902,
|
|
"eval_calibration/mean_confidence": 0.4784197317315613,
|
|
"eval_completions/clipped_ratio": 0.015625,
|
|
"eval_completions/max_length": 2400.8333333333335,
|
|
"eval_completions/max_terminated_length": 2400.8333333333335,
|
|
"eval_completions/mean_length": 665.2201538085938,
|
|
"eval_completions/mean_terminated_length": 675.7472432454427,
|
|
"eval_completions/min_length": 52.5,
|
|
"eval_completions/min_terminated_length": 213.66666666666666,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 96611664.0,
|
|
"eval_reward": 0.8650682667891184,
|
|
"eval_reward_std": 0.25732239087422687,
|
|
"eval_rewards/accuracy_reward": 0.629340281089147,
|
|
"eval_rewards/brier_reward": 0.6964937647183737,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8830659290154775,
|
|
"eval_rewards/format_reward": 0.9835069477558136,
|
|
"eval_rewards/frontier_aurc_reward": -0.002074420607338349,
|
|
"eval_rewards/frontier_ece_reward": -0.009346982141626844,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9835069477558136,
|
|
"eval_rewards/volume_coverage_0": -1.5854292647240105e-09,
|
|
"eval_rewards/volume_coverage_1": -1.5854292647240105e-09,
|
|
"eval_rewards/volume_coverage_10": -2.291667607993523e-09,
|
|
"eval_rewards/volume_coverage_15": -3.071365599349729e-09,
|
|
"eval_rewards/volume_coverage_20": -4.06165686500648e-09,
|
|
"eval_rewards/volume_coverage_25": -9.001347433627386e-09,
|
|
"eval_rewards/volume_coverage_5": -1.5854292647240105e-09,
|
|
"eval_runtime": 206.62,
|
|
"eval_samples_per_second": 4.84,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4482964376608531,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48037030796209973,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22414821883042654,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22414821883042654,
|
|
"eval_signal/advantage_abs_mean": 0.22970441232124963,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22970441232124963,
|
|
"eval_signal/advantage_pre_scale_std": 0.2550656571984291,
|
|
"eval_signal/advantage_std": 0.2550656571984291,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22326942533254623,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2752516021331151,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022326942533254623,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022326942533254623,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.058278885980447136,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10321150409678619,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005827888535956542,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005827888535956542,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.031521267568071686,
|
|
"eval_signal/format_reward/group_std_mean": 0.08134117722511292,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.5833333532015482,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.015760633784035843,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.015760633784035843,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0012940190790686756,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0025426297021719315,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6175238215510035e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6175238215510035e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.06478989496827126,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.10126168405016263,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0064789894968271255,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0064789894968271255,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.031521267568071686,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.08134117722511292,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5833333532015482,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0031521269120275974,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0031521269120275974,
|
|
"eval_signal/volume_coverage_0/centered_abs_mean": 6.963701263777248e-09,
|
|
"eval_signal/volume_coverage_0/group_std_mean": 9.221368557632559e-09,
|
|
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.7500000298023224,
|
|
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.963701444188489e-10,
|
|
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 6.963701444188489e-10,
|
|
"eval_signal/volume_coverage_1/centered_abs_mean": 6.963701263777248e-09,
|
|
"eval_signal/volume_coverage_1/group_std_mean": 9.221368557632559e-09,
|
|
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.7500000298023224,
|
|
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.963701444188489e-10,
|
|
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 6.963701444188489e-10,
|
|
"eval_signal/volume_coverage_10/centered_abs_mean": 1.4617537217690805e-08,
|
|
"eval_signal/volume_coverage_10/group_std_mean": 1.9474527879204118e-08,
|
|
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.6944444676240286,
|
|
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.46175369956462e-09,
|
|
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.46175369956462e-09,
|
|
"eval_signal/volume_coverage_15/centered_abs_mean": 2.0035404409609232e-08,
|
|
"eval_signal/volume_coverage_15/group_std_mean": 2.7039340803428995e-08,
|
|
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.6944444676240286,
|
|
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0035404076542327e-09,
|
|
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 2.0035404076542327e-09,
|
|
"eval_signal/volume_coverage_20/centered_abs_mean": 2.6633061052846092e-08,
|
|
"eval_signal/volume_coverage_20/group_std_mean": 3.595280454830411e-08,
|
|
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.6666666865348816,
|
|
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.6633059461526423e-09,
|
|
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 2.6633059461526423e-09,
|
|
"eval_signal/volume_coverage_25/centered_abs_mean": 4.514647552097036e-08,
|
|
"eval_signal/volume_coverage_25/group_std_mean": 6.072574250855685e-08,
|
|
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.5833333432674408,
|
|
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 4.514647766740154e-09,
|
|
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 4.514647766740154e-09,
|
|
"eval_signal/volume_coverage_5/centered_abs_mean": 6.963701263777248e-09,
|
|
"eval_signal/volume_coverage_5/group_std_mean": 9.221368557632559e-09,
|
|
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.7500000298023224,
|
|
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.963701444188489e-10,
|
|
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 6.963701444188489e-10,
|
|
"eval_steps_per_second": 0.029,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29729865103465236,
|
|
"calibration/batch_distribution_entropy": 0.9859283937759169,
|
|
"calibration/buffer_distribution_entropy": 0.7551744033764768,
|
|
"calibration/confidence_entropy": 0.5197851793470234,
|
|
"calibration/coverage@0%": 0.00996825658572783,
|
|
"calibration/coverage@1%": 0.00996825658572783,
|
|
"calibration/coverage@10%": 0.057737285457118905,
|
|
"calibration/coverage@15%": 0.2131178628849404,
|
|
"calibration/coverage@20%": 0.2545876791579063,
|
|
"calibration/coverage@25%": 0.3962292671531242,
|
|
"calibration/coverage@30%": 0.5168830431759815,
|
|
"calibration/coverage@5%": 0.01679240356735513,
|
|
"calibration/ece": 0.24835822174264982,
|
|
"calibration/mean_confidence": 0.50909457244184,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015538194444444441,
|
|
"completions/max_length": 3740.6,
|
|
"completions/max_terminated_length": 3740.6,
|
|
"completions/mean_length": 693.9699584960938,
|
|
"completions/mean_terminated_length": 705.0622924804687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 171.8,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.00044327336945571005,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0124,
|
|
"num_tokens": 107686774.0,
|
|
"reward": 0.9535149216651917,
|
|
"reward_std": 0.16562986373901367,
|
|
"rewards/accuracy_reward": 0.643750011920929,
|
|
"rewards/brier_reward": 0.6915101885795594,
|
|
"rewards/confidence_uniqueness_reward": 0.93819739818573,
|
|
"rewards/format_reward": 0.9842013835906982,
|
|
"rewards/frontier_aurc_reward": -0.002180169289931655,
|
|
"rewards/frontier_ece_reward": -0.010109073109924793,
|
|
"rewards/frontier_entropy_batch_reward": -0.2239334464073181,
|
|
"rewards/volume_coverage_0": -3.921182398225475e-09,
|
|
"rewards/volume_coverage_1": -3.921182398225475e-09,
|
|
"rewards/volume_coverage_10": -5.6717894131308455e-09,
|
|
"rewards/volume_coverage_15": -7.224053330534552e-09,
|
|
"rewards/volume_coverage_20": -3.3456322601210786e-08,
|
|
"rewards/volume_coverage_25": -1.1800991570609653e-07,
|
|
"rewards/volume_coverage_5": -3.921182398225475e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19537760615348815,
|
|
"signal/accuracy_reward/group_std_mean": 0.25658329427242277,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2805555611848831,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09768880307674407,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09768880307674407,
|
|
"signal/advantage_abs_mean": 0.12394773364067077,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12394773364067077,
|
|
"signal/advantage_pre_scale_std": 0.18684935569763184,
|
|
"signal/advantage_std": 0.18684935569763184,
|
|
"signal/brier_reward/centered_abs_mean": 0.22628356218338014,
|
|
"signal/brier_reward/group_std_mean": 0.27401196360588076,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022628356888890266,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022628356888890266,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03589446097612381,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06275491267442704,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003589446283876896,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003589446283876896,
|
|
"signal/format_reward/centered_abs_mean": 0.0267686627805233,
|
|
"signal/format_reward/group_std_mean": 0.052131906151771545,
|
|
"signal/format_reward/group_zero_std_frac": 0.7750000119209289,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01338433139026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01338433139026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001548130135051906,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025685901287943124,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9351627634023318e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9351627634023318e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06824411302804947,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0973386213183403,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006824411358684302,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006824411358684302,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31762467622756957,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39661539196968076,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031762467697262764,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031762467697262764,
|
|
"signal/volume_coverage_0/centered_abs_mean": 8.444079380165448e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.0913937664724927e-08,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7444444537162781,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.444079213631995e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.444079213631995e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 8.444079380165448e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.0913937664724927e-08,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7444444537162781,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.444079213631995e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.444079213631995e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 1.4421499627825796e-08,
|
|
"signal/volume_coverage_10/group_std_mean": 1.8671647872281484e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.7111111164093018,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.4421500160732848e-09,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.4421500160732848e-09,
|
|
"signal/volume_coverage_15/centered_abs_mean": 2.36699455480327e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 3.06581595665989e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.7027777910232544,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.3669946613846805e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.3669946613846805e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 6.965113588108807e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 8.979286612031955e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.5333333373069763,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 6.9651132639236835e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 6.9651132639236835e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 2.0221143959275878e-07,
|
|
"signal/volume_coverage_25/group_std_mean": 2.605379972209221e-07,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.4694444477558136,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.022114535815689e-08,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.022114535815689e-08,
|
|
"signal/volume_coverage_5/centered_abs_mean": 8.444079380165448e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 1.0913937664724927e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.7444444537162781,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.444079213631995e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 8.444079213631995e-10,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3616251978595281,
|
|
"calibration/batch_distribution_entropy": 0.9636990149237773,
|
|
"calibration/buffer_distribution_entropy": 0.7942048852247912,
|
|
"calibration/confidence_entropy": 0.48655814763306565,
|
|
"calibration/coverage@0%": 0.017837015808552077,
|
|
"calibration/coverage@1%": 0.017837015808552077,
|
|
"calibration/coverage@10%": 0.05555821654347036,
|
|
"calibration/coverage@15%": 0.18383875640449895,
|
|
"calibration/coverage@20%": 0.2142845849848963,
|
|
"calibration/coverage@25%": 0.29187982363828147,
|
|
"calibration/coverage@30%": 0.4164518759879103,
|
|
"calibration/coverage@5%": 0.02254905769336883,
|
|
"calibration/ece": 0.23523124663164277,
|
|
"calibration/mean_confidence": 0.6070413395957835,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018836805555555537,
|
|
"completions/max_length": 3748.8,
|
|
"completions/max_terminated_length": 3748.8,
|
|
"completions/mean_length": 731.7283813476563,
|
|
"completions/mean_terminated_length": 745.8790405273437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 200.0,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.0004057070182170719,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0151,
|
|
"num_tokens": 119212861.0,
|
|
"reward": 0.9394213914871216,
|
|
"reward_std": 0.1709626942873001,
|
|
"rewards/accuracy_reward": 0.6255208373069763,
|
|
"rewards/brier_reward": 0.7058334589004517,
|
|
"rewards/confidence_uniqueness_reward": 0.9315223217010498,
|
|
"rewards/format_reward": 0.98046875,
|
|
"rewards/frontier_aurc_reward": -0.0025447321124374865,
|
|
"rewards/frontier_ece_reward": 0.0011185122653841971,
|
|
"rewards/frontier_entropy_batch_reward": -0.27389044165611265,
|
|
"rewards/volume_coverage_0": -4.0031589154665426e-11,
|
|
"rewards/volume_coverage_1": -4.0031589154665426e-11,
|
|
"rewards/volume_coverage_10": -4.686928956054714e-10,
|
|
"rewards/volume_coverage_15": 7.733450324565183e-10,
|
|
"rewards/volume_coverage_20": -1.1667458288400211e-08,
|
|
"rewards/volume_coverage_25": -1.7245495342876894e-08,
|
|
"rewards/volume_coverage_5": -4.0031589154665426e-11,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19491102397441865,
|
|
"signal/accuracy_reward/group_std_mean": 0.2536593437194824,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09745551198720932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09745551198720932,
|
|
"signal/advantage_abs_mean": 0.12908945828676224,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12908945828676224,
|
|
"signal/advantage_pre_scale_std": 0.1949590265750885,
|
|
"signal/advantage_std": 0.1949590265750885,
|
|
"signal/brier_reward/centered_abs_mean": 0.22423318326473235,
|
|
"signal/brier_reward/group_std_mean": 0.27205477356910707,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022423317655920982,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022423317655920982,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0429903544485569,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06899664849042893,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004299035528674722,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004299035528674722,
|
|
"signal/format_reward/centered_abs_mean": 0.03196072056889534,
|
|
"signal/format_reward/group_std_mean": 0.05609421357512474,
|
|
"signal/format_reward/group_zero_std_frac": 0.7805555582046508,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01598036028444767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01598036028444767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023376439232379196,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036471809260547163,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.922054991358891e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.922054991358891e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06132573038339615,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08886844366788864,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00613257298246026,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00613257298246026,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35195544362068176,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4247310280799866,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0351955458521843,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0351955458521843,
|
|
"signal/volume_coverage_0/centered_abs_mean": 7.865199025403058e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.0264956484817845e-08,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.865199214140973e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.865199214140973e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 7.865199025403058e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.0264956484817845e-08,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.865199214140973e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.865199214140973e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 1.0155620655538655e-08,
|
|
"signal/volume_coverage_10/group_std_mean": 1.3240603413677832e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.0155620733254266e-09,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.0155620733254266e-09,
|
|
"signal/volume_coverage_15/centered_abs_mean": 2.9428355929894678e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 3.860327986160428e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.5583333432674408,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.9428356862482017e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.9428356862482017e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 8.900074135453906e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 1.1687629637435748e-07,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.4083333432674408,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.900074099926769e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 8.900074099926769e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 1.9333558469725177e-07,
|
|
"signal/volume_coverage_25/group_std_mean": 2.5326322656837873e-07,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.336111119389534,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.9333558576306587e-08,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.9333558576306587e-08,
|
|
"signal/volume_coverage_5/centered_abs_mean": 7.865199025403058e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 1.0264956484817845e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.865199214140973e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 7.865199214140973e-10,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2726635652293484,
|
|
"calibration/batch_distribution_entropy": 0.9804184648220506,
|
|
"calibration/buffer_distribution_entropy": 0.8191348651993706,
|
|
"calibration/confidence_entropy": 0.504464702619779,
|
|
"calibration/coverage@0%": 0.019694467992829885,
|
|
"calibration/coverage@1%": 0.019694467992829885,
|
|
"calibration/coverage@10%": 0.030910192193535312,
|
|
"calibration/coverage@15%": 0.20113148411819665,
|
|
"calibration/coverage@20%": 0.4828118542224125,
|
|
"calibration/coverage@25%": 0.5705832777945432,
|
|
"calibration/coverage@30%": 0.6223547628605839,
|
|
"calibration/coverage@5%": 0.025576820934006355,
|
|
"calibration/ece": 0.2045513577079625,
|
|
"calibration/mean_confidence": 0.5563930817294047,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01605902777777779,
|
|
"completions/max_length": 3807.6,
|
|
"completions/max_terminated_length": 3807.6,
|
|
"completions/mean_length": 753.4966186523437,
|
|
"completions/mean_terminated_length": 765.7596557617187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 183.2,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.000409733853302896,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0118,
|
|
"num_tokens": 130987190.0,
|
|
"reward": 0.9614022612571717,
|
|
"reward_std": 0.1574880450963974,
|
|
"rewards/accuracy_reward": 0.6534722208976745,
|
|
"rewards/brier_reward": 0.7185048341751099,
|
|
"rewards/confidence_uniqueness_reward": 0.9371358990669251,
|
|
"rewards/format_reward": 0.983506953716278,
|
|
"rewards/frontier_aurc_reward": -0.002122586825862527,
|
|
"rewards/frontier_ece_reward": 0.0014065916649997235,
|
|
"rewards/frontier_entropy_batch_reward": -0.22765516638755798,
|
|
"rewards/volume_coverage_0": -5.068013078646238e-10,
|
|
"rewards/volume_coverage_1": -5.068013078646238e-10,
|
|
"rewards/volume_coverage_10": -9.84125698988425e-10,
|
|
"rewards/volume_coverage_15": -1.6737272082179011e-09,
|
|
"rewards/volume_coverage_20": -2.9347400509749376e-09,
|
|
"rewards/volume_coverage_25": -7.171816296391853e-09,
|
|
"rewards/volume_coverage_5": -5.068013078646238e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1733398422598839,
|
|
"signal/accuracy_reward/group_std_mean": 0.2318666011095047,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.33333333134651183,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08666992112994194,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08666992112994194,
|
|
"signal/advantage_abs_mean": 0.11658722907304764,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11658722907304764,
|
|
"signal/advantage_pre_scale_std": 0.18216001391410827,
|
|
"signal/advantage_std": 0.18216001391410827,
|
|
"signal/brier_reward/centered_abs_mean": 0.21878646910190583,
|
|
"signal/brier_reward/group_std_mean": 0.2677512466907501,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021878646686673164,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021878646686673164,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03749167211353779,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0637543372809887,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037491672672331335,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037491672672331335,
|
|
"signal/format_reward/centered_abs_mean": 0.02845052108168602,
|
|
"signal/format_reward/group_std_mean": 0.05323779508471489,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01422526054084301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01422526054084301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019353487994521856,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031194576993584635,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.419186057522893e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.419186057522893e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06133586913347244,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08665431588888169,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006133586913347244,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006133586913347244,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3109153091907501,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38727723360061644,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031091532111167906,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031091532111167906,
|
|
"signal/volume_coverage_0/centered_abs_mean": 2.2870564997390376e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 3.016555882862093e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.2870566274146854e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.2870566274146854e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 2.2870564997390376e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 3.016555882862093e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.2870566274146854e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.2870566274146854e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 8.391811423535955e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 1.086056132804103e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.8583333253860473,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 8.391811218144696e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 8.391811218144696e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.444390749227864e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 1.882116328344452e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8083333373069763,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4443908041839038e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.4443908041839038e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 2.9427038361617974e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 3.849100700392683e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.65,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.9427039305307544e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.9427039305307544e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 6.738196756117532e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 8.714540680188066e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.522222226858139,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 6.738196756117531e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 6.738196756117531e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 2.2870564997390376e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 3.016555882862093e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.2870566274146854e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.2870566274146854e-10,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3393896341269902,
|
|
"calibration/batch_distribution_entropy": 0.9831179408403127,
|
|
"calibration/buffer_distribution_entropy": 0.8426835690912935,
|
|
"calibration/confidence_entropy": 0.5218072154111429,
|
|
"calibration/coverage@0%": 0.007841300976716198,
|
|
"calibration/coverage@1%": 0.007841300976716198,
|
|
"calibration/coverage@10%": 0.00836213431004953,
|
|
"calibration/coverage@15%": 0.00836213431004953,
|
|
"calibration/coverage@20%": 0.01679714622969032,
|
|
"calibration/coverage@25%": 0.11030523827709116,
|
|
"calibration/coverage@30%": 0.3504912616795737,
|
|
"calibration/coverage@5%": 0.007841300976716198,
|
|
"calibration/ece": 0.21396587911327264,
|
|
"calibration/mean_confidence": 0.53053761128133,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01571180555555558,
|
|
"completions/max_length": 3301.0,
|
|
"completions/max_terminated_length": 3301.0,
|
|
"completions/mean_length": 747.0869018554688,
|
|
"completions/mean_terminated_length": 759.0447265625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 180.6,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.00042153653339482844,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.0122,
|
|
"num_tokens": 142671775.0,
|
|
"reward": 0.9516011714935303,
|
|
"reward_std": 0.155648335814476,
|
|
"rewards/accuracy_reward": 0.6380208373069763,
|
|
"rewards/brier_reward": 0.694776999950409,
|
|
"rewards/confidence_uniqueness_reward": 0.9385874152183533,
|
|
"rewards/format_reward": 0.9842013955116272,
|
|
"rewards/frontier_aurc_reward": -0.00205742665566504,
|
|
"rewards/frontier_ece_reward": -0.005769663273531478,
|
|
"rewards/frontier_entropy_batch_reward": -0.2224372446537018,
|
|
"rewards/volume_coverage_0": -1.3082197200531277e-10,
|
|
"rewards/volume_coverage_1": -1.3082197200531277e-10,
|
|
"rewards/volume_coverage_10": -1.3082197200531277e-10,
|
|
"rewards/volume_coverage_15": -9.936441391911188e-10,
|
|
"rewards/volume_coverage_20": -1.2587668718477829e-08,
|
|
"rewards/volume_coverage_25": -1.5884197990256867e-08,
|
|
"rewards/volume_coverage_5": -1.3082197200531277e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17438151240348815,
|
|
"signal/accuracy_reward/group_std_mean": 0.22899937331676484,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35000000298023226,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08719075620174407,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08719075620174407,
|
|
"signal/advantage_abs_mean": 0.11629650443792343,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11629650443792343,
|
|
"signal/advantage_pre_scale_std": 0.17956087589263917,
|
|
"signal/advantage_std": 0.17956087589263917,
|
|
"signal/brier_reward/centered_abs_mean": 0.22307340502738954,
|
|
"signal/brier_reward/group_std_mean": 0.27035006880760193,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022307340800762177,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022307340800762177,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03593003079295158,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05953029617667198,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003593003237619996,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003593003237619996,
|
|
"signal/format_reward/centered_abs_mean": 0.02693142332136631,
|
|
"signal/format_reward/group_std_mean": 0.04885709583759308,
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013465711660683156,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013465711660683156,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016406909562647343,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002687893947586417,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.050863695330918e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.050863695330918e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06620457619428635,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09011965543031693,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006620457675307989,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006620457675307989,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.312404465675354,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39026838541030884,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031240447983145715,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031240447983145715,
|
|
"signal/volume_coverage_0/centered_abs_mean": 6.527382523380254e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 8.340057505717979e-10,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 6.527382523380254e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 8.340057505717979e-10,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 6.527382523380254e-10,
|
|
"signal/volume_coverage_10/group_std_mean": 8.340057505717979e-10,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"signal/volume_coverage_15/centered_abs_mean": 2.0081497686952334e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 2.5946954695932335e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.9277777671813965,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.0081497367763212e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.0081497367763212e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 1.9240841941936536e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.483854966151e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7888888835906982,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.924084341853316e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.924084341853316e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 2.63578932013786e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 3.40409355814586e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.75,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.635789275728939e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.635789275728939e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 6.527382523380254e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 8.340057505717979e-10,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 6.527382211130029e-11,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26635782488515874,
|
|
"calibration/batch_distribution_entropy": 0.9667058065508993,
|
|
"calibration/buffer_distribution_entropy": 0.8619489597654877,
|
|
"calibration/confidence_entropy": 0.5343618937372703,
|
|
"calibration/coverage@0%": 0.007416344462318554,
|
|
"calibration/coverage@1%": 0.007416344462318554,
|
|
"calibration/coverage@10%": 0.13569970920759128,
|
|
"calibration/coverage@15%": 0.24482519000498995,
|
|
"calibration/coverage@20%": 0.34556386311354187,
|
|
"calibration/coverage@25%": 0.42643206444033765,
|
|
"calibration/coverage@30%": 0.6321881541722456,
|
|
"calibration/coverage@5%": 0.04180787885385294,
|
|
"calibration/ece": 0.21680189415961587,
|
|
"calibration/mean_confidence": 0.5755003843171993,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014670138888888884,
|
|
"completions/max_length": 3507.6,
|
|
"completions/max_terminated_length": 3507.6,
|
|
"completions/mean_length": 736.2677124023437,
|
|
"completions/mean_terminated_length": 747.2797119140625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 227.2,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.0004315000551287085,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0117,
|
|
"num_tokens": 154218475.0,
|
|
"reward": 0.9771409153938293,
|
|
"reward_std": 0.15745867788791656,
|
|
"rewards/accuracy_reward": 0.6884548664093018,
|
|
"rewards/brier_reward": 0.7252273917198181,
|
|
"rewards/confidence_uniqueness_reward": 0.936829400062561,
|
|
"rewards/format_reward": 0.9852430582046509,
|
|
"rewards/frontier_aurc_reward": -0.0018611573614180088,
|
|
"rewards/frontier_ece_reward": -0.003944494191091508,
|
|
"rewards/frontier_entropy_batch_reward": -0.25496000945568087,
|
|
"rewards/volume_coverage_0": -5.460884883179418e-10,
|
|
"rewards/volume_coverage_1": -5.460884883179418e-10,
|
|
"rewards/volume_coverage_10": -1.2820269373881032e-09,
|
|
"rewards/volume_coverage_15": -2.6095399480174917e-09,
|
|
"rewards/volume_coverage_20": -9.53145136151079e-09,
|
|
"rewards/volume_coverage_25": -2.2126803767008597e-08,
|
|
"rewards/volume_coverage_5": -5.460884883179418e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1789984792470932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2308933675289154,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0894992396235466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0894992396235466,
|
|
"signal/advantage_abs_mean": 0.11769915223121644,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11769915223121644,
|
|
"signal/advantage_pre_scale_std": 0.1850135773420334,
|
|
"signal/advantage_std": 0.1850135773420334,
|
|
"signal/brier_reward/centered_abs_mean": 0.20045875906944274,
|
|
"signal/brier_reward/group_std_mean": 0.24788658916950226,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020045876502990723,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020045876502990723,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.036058619245886804,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06366177275776863,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036058619152754545,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036058619152754545,
|
|
"signal/format_reward/centered_abs_mean": 0.026019965298473835,
|
|
"signal/format_reward/group_std_mean": 0.05199590064585209,
|
|
"signal/format_reward/group_zero_std_frac": 0.775,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013009982649236917,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013009982649236917,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016442745458334685,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026633210014551877,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.055343247775454e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.055343247775454e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05721868574619293,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08083326071500778,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005721868854016066,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005721868854016066,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3330303609371185,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40677814483642577,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03330303654074669,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03330303654074669,
|
|
"signal/volume_coverage_0/centered_abs_mean": 1.969390450717867e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 2.5594340091750213e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.925,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.9693905742301788e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.9693905742301788e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 1.969390450717867e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 2.5594340091750213e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.925,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.9693905742301788e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.9693905742301788e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 4.858173541233413e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 6.3628841862062305e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.8305555582046509,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.858173523192288e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.858173523192288e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.0683860851568028e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 1.3964444456338044e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.725,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.0683861344229494e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.0683861344229494e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 3.9493965964254586e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 5.174840893573673e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.6194444358348846,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.94939684289497e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.94939684289497e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 8.536078723864194e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 1.1200458835958215e-07,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.5416666686534881,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 8.53607891038166e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 8.53607891038166e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.969390450717867e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 2.5594340091750213e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.925,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.9693905742301788e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.9693905742301788e-10,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2429387059383234,
|
|
"calibration/batch_distribution_entropy": 0.9529905402102818,
|
|
"calibration/buffer_distribution_entropy": 0.8738844216235699,
|
|
"calibration/confidence_entropy": 0.5125112143308769,
|
|
"calibration/coverage@0%": 0.010054332874111489,
|
|
"calibration/coverage@1%": 0.010054332874111489,
|
|
"calibration/coverage@10%": 0.06927349297909838,
|
|
"calibration/coverage@15%": 0.3246663063262004,
|
|
"calibration/coverage@20%": 0.535180322504506,
|
|
"calibration/coverage@25%": 0.6291437787886591,
|
|
"calibration/coverage@30%": 0.6753869481875662,
|
|
"calibration/coverage@5%": 0.017928348622142987,
|
|
"calibration/ece": 0.20932305149161134,
|
|
"calibration/mean_confidence": 0.6136822693300799,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013888888888888885,
|
|
"completions/max_length": 3717.2,
|
|
"completions/max_terminated_length": 3717.2,
|
|
"completions/mean_length": 752.978662109375,
|
|
"completions/mean_terminated_length": 763.6736328125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 190.0,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.00040160753997042775,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0103,
|
|
"num_tokens": 165946069.0,
|
|
"reward": 0.9624287486076355,
|
|
"reward_std": 0.1525136023759842,
|
|
"rewards/accuracy_reward": 0.6596354246139526,
|
|
"rewards/brier_reward": 0.7311666131019592,
|
|
"rewards/confidence_uniqueness_reward": 0.9364403367042542,
|
|
"rewards/format_reward": 0.9856770873069763,
|
|
"rewards/frontier_aurc_reward": -0.0021038626320660113,
|
|
"rewards/frontier_ece_reward": 0.0012513543479144573,
|
|
"rewards/frontier_entropy_batch_reward": -0.27087019085884095,
|
|
"rewards/volume_coverage_0": -4.568020739448286e-10,
|
|
"rewards/volume_coverage_1": -4.568020739448286e-10,
|
|
"rewards/volume_coverage_10": -2.0638052045107712e-09,
|
|
"rewards/volume_coverage_15": -2.9442815886737693e-09,
|
|
"rewards/volume_coverage_20": -4.258802924206328e-09,
|
|
"rewards/volume_coverage_25": -9.412961088361981e-09,
|
|
"rewards/volume_coverage_5": -4.568020739448286e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17906358242034912,
|
|
"signal/accuracy_reward/group_std_mean": 0.23346365988254547,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.34722223281860354,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08953179121017456,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08953179121017456,
|
|
"signal/advantage_abs_mean": 0.1167010024189949,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1167010024189949,
|
|
"signal/advantage_pre_scale_std": 0.17792364954948425,
|
|
"signal/advantage_std": 0.17792364954948425,
|
|
"signal/brier_reward/centered_abs_mean": 0.192233869433403,
|
|
"signal/brier_reward/group_std_mean": 0.23862990140914916,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019223386794328688,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019223386794328688,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031697943806648254,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0505401112139225,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003169794473797083,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003169794473797083,
|
|
"signal/format_reward/centered_abs_mean": 0.02013346329331398,
|
|
"signal/format_reward/group_std_mean": 0.03664347417652607,
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01006673164665699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01006673164665699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001982904877513647,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031933929305523632,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4786311405478047e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4786311405478047e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05222774744033813,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07532420605421067,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00522277494892478,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00522277494892478,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33515734076499937,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40675837397575376,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03351573422551155,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03351573422551155,
|
|
"signal/volume_coverage_0/centered_abs_mean": 2.5033016437525645e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 3.3354313089262176e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.8972222208976746,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.503301503586908e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.503301503586908e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 2.5033016437525645e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 3.3354313089262176e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.8972222208976746,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.503301503586908e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.503301503586908e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 7.160277382123326e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 9.599346739719294e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.8472222149372101,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.160277197548748e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 7.160277197548748e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.2688166350915964e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 1.6908503219781323e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.7361111164093017,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.2688165622332104e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.2688165622332104e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 1.812260547184863e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.409684691784264e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.6916666626930237,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.8122604854287071e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.8122604854287071e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 7.594725190962492e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 1.0130850087719523e-07,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.6416666686534882,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 7.594725902337895e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 7.594725902337895e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 2.5033016437525645e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 3.3354313089262176e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.8972222208976746,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.503301503586908e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.503301503586908e-10,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.272205180148172,
|
|
"calibration/batch_distribution_entropy": 0.9839264217170511,
|
|
"calibration/buffer_distribution_entropy": 0.8838308061064566,
|
|
"calibration/confidence_entropy": 0.5268860344745087,
|
|
"calibration/coverage@0%": 0.02139878392542739,
|
|
"calibration/coverage@1%": 0.02139878392542739,
|
|
"calibration/coverage@10%": 0.06428258517319922,
|
|
"calibration/coverage@15%": 0.13265800491797142,
|
|
"calibration/coverage@20%": 0.2533631740237014,
|
|
"calibration/coverage@25%": 0.3971892928952113,
|
|
"calibration/coverage@30%": 0.6274081366442885,
|
|
"calibration/coverage@5%": 0.02244045059209406,
|
|
"calibration/ece": 0.18339377635586349,
|
|
"calibration/mean_confidence": 0.5337971173985369,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009548611111111115,
|
|
"completions/max_length": 3463.0,
|
|
"completions/max_terminated_length": 3463.0,
|
|
"completions/mean_length": 743.687255859375,
|
|
"completions/mean_terminated_length": 750.9121826171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 227.8,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.00039456840022467077,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.007,
|
|
"num_tokens": 177600546.0,
|
|
"reward": 0.977763569355011,
|
|
"reward_std": 0.14530135691165924,
|
|
"rewards/accuracy_reward": 0.68359375,
|
|
"rewards/brier_reward": 0.7319249629974365,
|
|
"rewards/confidence_uniqueness_reward": 0.9425673007965087,
|
|
"rewards/format_reward": 0.9901041626930237,
|
|
"rewards/frontier_aurc_reward": -0.0018502724356949329,
|
|
"rewards/frontier_ece_reward": -0.0009264084612368606,
|
|
"rewards/frontier_entropy_batch_reward": -0.2641885936260223,
|
|
"rewards/volume_coverage_0": -1.9229808995158136e-10,
|
|
"rewards/volume_coverage_1": -1.9229808995158136e-10,
|
|
"rewards/volume_coverage_10": -4.5319727554060664e-10,
|
|
"rewards/volume_coverage_15": -1.2661274056968707e-09,
|
|
"rewards/volume_coverage_20": -4.186512286841193e-09,
|
|
"rewards/volume_coverage_25": -5.821830104113523e-09,
|
|
"rewards/volume_coverage_5": -1.9229808995158136e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16773546040058135,
|
|
"signal/accuracy_reward/group_std_mean": 0.2227248728275299,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3527777850627899,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08386773020029067,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08386773020029067,
|
|
"signal/advantage_abs_mean": 0.1090440571308136,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1090440571308136,
|
|
"signal/advantage_pre_scale_std": 0.1700405955314636,
|
|
"signal/advantage_std": 0.1700405955314636,
|
|
"signal/brier_reward/centered_abs_mean": 0.1963264286518097,
|
|
"signal/brier_reward/group_std_mean": 0.2435892939567566,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019632643461227416,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019632643461227416,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028791505470871927,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.048539139330387115,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028791506309062244,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028791506309062244,
|
|
"signal/format_reward/centered_abs_mean": 0.01761067733168602,
|
|
"signal/format_reward/group_std_mean": 0.03504730835556984,
|
|
"signal/format_reward/group_zero_std_frac": 0.8527777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00880533866584301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00880533866584301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001829707226715982,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029482690151780844,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.287134120706469e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.287134120706469e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05751297697424888,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08030709475278855,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005751297902315855,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005751297902315855,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33534626960754393,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40883824825286863,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03353462740778923,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03353462740778923,
|
|
"signal/volume_coverage_0/centered_abs_mean": 8.776430915347078e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 1.13681020152967e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.776430762691411e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.776430762691411e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 8.776430915347078e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 1.13681020152967e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.776430762691411e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.776430762691411e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 1.6664782886977036e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 2.1400602179255657e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9611111044883728,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.6664783095143854e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.6664783095143854e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.3187805891767824e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 1.7327763579544352e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8333333373069763,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.3187805479597524e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.3187805479597524e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 2.027334065068942e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.671985650515296e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7722222328186035,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.0273341443111104e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.0273341443111104e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 3.823409808756395e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 5.032821552841682e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.823410063552579e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.823410063552579e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 8.776430915347078e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 1.13681020152967e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.776430762691411e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 8.776430762691411e-11,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19922293559866006,
|
|
"calibration/batch_distribution_entropy": 0.9824476168092499,
|
|
"calibration/buffer_distribution_entropy": 0.8959630638539714,
|
|
"calibration/confidence_entropy": 0.5189281704536632,
|
|
"calibration/coverage@0%": 0.025054624089309953,
|
|
"calibration/coverage@1%": 0.025054624089309953,
|
|
"calibration/coverage@10%": 0.256557141472063,
|
|
"calibration/coverage@15%": 0.38917092578741797,
|
|
"calibration/coverage@20%": 0.5117063492063492,
|
|
"calibration/coverage@25%": 0.6346587161242334,
|
|
"calibration/coverage@30%": 0.8457187767532595,
|
|
"calibration/coverage@5%": 0.09872118168093298,
|
|
"calibration/ece": 0.23940879525551675,
|
|
"calibration/mean_confidence": 0.5118995725139233,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010329861111111093,
|
|
"completions/max_length": 3339.2,
|
|
"completions/max_terminated_length": 3339.2,
|
|
"completions/mean_length": 700.8325561523437,
|
|
"completions/mean_terminated_length": 708.1759155273437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 197.0,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.00047273669042624533,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0082,
|
|
"num_tokens": 188742809.0,
|
|
"reward": 0.9794747948646545,
|
|
"reward_std": 0.14528339505195617,
|
|
"rewards/accuracy_reward": 0.6794270873069763,
|
|
"rewards/brier_reward": 0.7353934526443482,
|
|
"rewards/confidence_uniqueness_reward": 0.9436571598052979,
|
|
"rewards/format_reward": 0.9894965171813965,
|
|
"rewards/frontier_aurc_reward": -0.0016743445303291082,
|
|
"rewards/frontier_ece_reward": 0.0010622843401506543,
|
|
"rewards/frontier_entropy_batch_reward": -0.2297738403081894,
|
|
"rewards/volume_coverage_0": -3.8754242245864213e-10,
|
|
"rewards/volume_coverage_1": -3.8754242245864213e-10,
|
|
"rewards/volume_coverage_10": -5.763823057725049e-10,
|
|
"rewards/volume_coverage_15": -6.379984013316075e-10,
|
|
"rewards/volume_coverage_20": -3.930808684105002e-09,
|
|
"rewards/volume_coverage_25": -1.0270524869460118e-08,
|
|
"rewards/volume_coverage_5": -3.8754242245864213e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17314995527267457,
|
|
"signal/accuracy_reward/group_std_mean": 0.22633666098117827,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3527777850627899,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08657497763633729,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08657497763633729,
|
|
"signal/advantage_abs_mean": 0.10956384688615799,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10956384688615799,
|
|
"signal/advantage_pre_scale_std": 0.1703871190547943,
|
|
"signal/advantage_std": 0.1703871190547943,
|
|
"signal/brier_reward/centered_abs_mean": 0.19396249949932098,
|
|
"signal/brier_reward/group_std_mean": 0.24135461449623108,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019396250322461128,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019396250322461128,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028895640373229982,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04839541018009186,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028895641677081584,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028895641677081584,
|
|
"signal/format_reward/centered_abs_mean": 0.01848415769636631,
|
|
"signal/format_reward/group_std_mean": 0.035885289683938025,
|
|
"signal/format_reward/group_zero_std_frac": 0.85,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009242078848183155,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009242078848183155,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016561457188799978,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002781048696488142,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0701821995317005e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0701821995317005e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06060823351144791,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08322409242391586,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0060608237981796265,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0060608237981796265,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3175591230392456,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39123265743255614,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03175591304898262,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03175591304898262,
|
|
"signal/volume_coverage_0/centered_abs_mean": 1.2535025073479033e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.6031172722641428e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9583333373069763,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.2535024684900974e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.2535024684900974e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 1.2535025073479033e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.6031172722641428e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9583333373069763,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.2535024684900974e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.2535024684900974e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.88981866769511e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 3.737770304379495e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9166666626930237,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.8898186482662067e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.8898186482662067e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 3.634362555349213e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 4.710481982250769e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8611111164093017,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.6343624776336016e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.6343624776336016e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 1.5966736732764274e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.0725850369274212e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7833333253860474,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.5966736904848843e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.5966736904848843e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 3.288729004680135e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 4.266394340035617e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.7222222208976745,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.288729222283848e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.288729222283848e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.2535025073479033e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 1.6031172722641428e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9583333373069763,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2535024684900974e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.2535024684900974e-10,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2507009224960528,
|
|
"calibration/batch_distribution_entropy": 0.9775486373906073,
|
|
"calibration/buffer_distribution_entropy": 0.9065992578768183,
|
|
"calibration/confidence_entropy": 0.5001866055085517,
|
|
"calibration/coverage@0%": 0.03300766861293068,
|
|
"calibration/coverage@1%": 0.03300766861293068,
|
|
"calibration/coverage@10%": 0.2548171806438221,
|
|
"calibration/coverage@15%": 0.40902696201277405,
|
|
"calibration/coverage@20%": 0.5034949578671087,
|
|
"calibration/coverage@25%": 0.538191739190496,
|
|
"calibration/coverage@30%": 0.6138651747478934,
|
|
"calibration/coverage@5%": 0.0885765078483404,
|
|
"calibration/ece": 0.2086511975868341,
|
|
"calibration/mean_confidence": 0.5522644108583503,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.007204861111111116,
|
|
"completions/max_length": 3247.8,
|
|
"completions/max_terminated_length": 3247.8,
|
|
"completions/mean_length": 693.4934814453125,
|
|
"completions/mean_terminated_length": 698.5267456054687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 173.0,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.0003818366676568985,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0053,
|
|
"num_tokens": 199823534.0,
|
|
"reward": 0.9745383620262146,
|
|
"reward_std": 0.13535202145576478,
|
|
"rewards/accuracy_reward": 0.6660590291023254,
|
|
"rewards/brier_reward": 0.7394654631614686,
|
|
"rewards/confidence_uniqueness_reward": 0.9451273560523987,
|
|
"rewards/format_reward": 0.9927951335906983,
|
|
"rewards/frontier_aurc_reward": -0.001798482658341527,
|
|
"rewards/frontier_ece_reward": 0.0039430757868103685,
|
|
"rewards/frontier_entropy_batch_reward": -0.237198144197464,
|
|
"rewards/volume_coverage_0": -1.48676847916418e-10,
|
|
"rewards/volume_coverage_1": -1.48676847916418e-10,
|
|
"rewards/volume_coverage_10": 1.4221258060054254e-10,
|
|
"rewards/volume_coverage_15": -9.984373341986873e-10,
|
|
"rewards/volume_coverage_20": -1.1343260775120712e-09,
|
|
"rewards/volume_coverage_25": -1.9268936113370216e-09,
|
|
"rewards/volume_coverage_5": -1.48676847916418e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15666775405406952,
|
|
"signal/accuracy_reward/group_std_mean": 0.20973491668701172,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.397222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07833387702703476,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07833387702703476,
|
|
"signal/advantage_abs_mean": 0.10032767802476883,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10032767802476883,
|
|
"signal/advantage_pre_scale_std": 0.15808959305286407,
|
|
"signal/advantage_std": 0.15808959305286407,
|
|
"signal/brier_reward/centered_abs_mean": 0.18711373209953308,
|
|
"signal/brier_reward/group_std_mean": 0.2359489381313324,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01871137283742428,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01871137283742428,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02431831918656826,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04133199006319046,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024318320211023092,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024318320211023092,
|
|
"signal/format_reward/centered_abs_mean": 0.01224500872194767,
|
|
"signal/format_reward/group_std_mean": 0.026611294224858285,
|
|
"signal/format_reward/group_zero_std_frac": 0.875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006122504360973835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006122504360973835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018433568766340613,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003007926885038614,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3041959866532125e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3041959866532125e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05773395150899887,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07975224107503891,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005773395299911499,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005773395299911499,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3170252025127411,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3918557822704315,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031702518835663794,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031702518835663794,
|
|
"signal/volume_coverage_0/centered_abs_mean": 8.475213697511918e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 1.120057463444013e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9666666626930237,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.475213968128781e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.475213968128781e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 8.475213697511918e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 1.120057463444013e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9666666626930237,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.475213968128781e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.475213968128781e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.524260600544892e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 3.2873440358738293e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9333333253860474,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.524260685893287e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.524260685893287e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 5.845151940242488e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 7.716072530916662e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.7861111164093018,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.845151931915815e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.845151931915815e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 7.86484022352596e-09,
|
|
"signal/volume_coverage_20/group_std_mean": 1.0402011918486664e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7722222328186035,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.864840639859594e-10,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 7.864840639859594e-10,
|
|
"signal/volume_coverage_25/centered_abs_mean": 1.438830108391187e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 1.8884527674956075e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.661111107468605,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.4388301466938812e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.4388301466938812e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 8.475213697511918e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 1.120057463444013e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9666666626930237,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.475213968128781e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 8.475213968128781e-11,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18728217917272516,
|
|
"calibration/batch_distribution_entropy": 0.9694950506314033,
|
|
"calibration/buffer_distribution_entropy": 0.9121722739252384,
|
|
"calibration/confidence_entropy": 0.5021543179603883,
|
|
"calibration/coverage@0%": 0.017372255964316463,
|
|
"calibration/coverage@1%": 0.017372255964316463,
|
|
"calibration/coverage@10%": 0.11814513525377532,
|
|
"calibration/coverage@15%": 0.4494851837396229,
|
|
"calibration/coverage@20%": 0.6551994681070942,
|
|
"calibration/coverage@25%": 0.8486937388591802,
|
|
"calibration/coverage@30%": 0.9043421345811051,
|
|
"calibration/coverage@5%": 0.028346665413135363,
|
|
"calibration/ece": 0.17114394458500604,
|
|
"calibration/mean_confidence": 0.5716688069748909,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011458333333333348,
|
|
"completions/max_length": 3231.4,
|
|
"completions/max_terminated_length": 3231.4,
|
|
"completions/mean_length": 706.346533203125,
|
|
"completions/mean_terminated_length": 714.49365234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 212.2,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.0004011181299574673,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0082,
|
|
"num_tokens": 211059718.0,
|
|
"reward": 0.9819074869155884,
|
|
"reward_std": 0.13814267814159392,
|
|
"rewards/accuracy_reward": 0.6813368082046509,
|
|
"rewards/brier_reward": 0.7655829429626465,
|
|
"rewards/confidence_uniqueness_reward": 0.940147602558136,
|
|
"rewards/format_reward": 0.9884548544883728,
|
|
"rewards/frontier_aurc_reward": -0.0016520792851224542,
|
|
"rewards/frontier_ece_reward": 0.010339464247226714,
|
|
"rewards/frontier_entropy_batch_reward": -0.24574714303016662,
|
|
"rewards/volume_coverage_0": 7.541852403009086e-11,
|
|
"rewards/volume_coverage_1": 7.541852403009086e-11,
|
|
"rewards/volume_coverage_10": -2.984458390489775e-10,
|
|
"rewards/volume_coverage_15": -3.88511360756294e-10,
|
|
"rewards/volume_coverage_20": -6.244302493477183e-10,
|
|
"rewards/volume_coverage_25": -5.88479067697989e-09,
|
|
"rewards/volume_coverage_5": 7.541852403009086e-11,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16082356572151185,
|
|
"signal/accuracy_reward/group_std_mean": 0.21090216040611268,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08041178286075593,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08041178286075593,
|
|
"signal/advantage_abs_mean": 0.10463630557060241,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10463630557060241,
|
|
"signal/advantage_pre_scale_std": 0.16416526734828948,
|
|
"signal/advantage_std": 0.16416526734828948,
|
|
"signal/brier_reward/centered_abs_mean": 0.17226437330245972,
|
|
"signal/brier_reward/group_std_mean": 0.21906766891479493,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017226437106728554,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017226437106728554,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029767391458153724,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.049494147300720215,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029767390806227922,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029767390806227922,
|
|
"signal/format_reward/centered_abs_mean": 0.018419053964316846,
|
|
"signal/format_reward/group_std_mean": 0.035861417278647424,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009209526982158423,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009209526982158423,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018402117071673273,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002930039027705789,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.300264750374481e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.300264750374481e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0525245763361454,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0730134516954422,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005252457968890667,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005252457968890667,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32134751677513124,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.393948096036911,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032134751230478285,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032134751230478285,
|
|
"signal/volume_coverage_0/centered_abs_mean": 8.663491951610425e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 1.1307943567628876e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9722222208976745,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.663492540549045e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.663492540549045e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 8.663491951610425e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 1.1307943567628876e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9722222208976745,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.663492540549045e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.663492540549045e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 4.449955628982316e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 5.785257982393599e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.8777777791023255,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.4499558877163226e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.4499558877163226e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 6.421526446698245e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 8.361979440818512e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.421526364038671e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.421526364038671e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 8.840661205877787e-09,
|
|
"signal/volume_coverage_20/group_std_mean": 1.1587044899952215e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7861111104488373,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.840661395222854e-10,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 8.840661395222854e-10,
|
|
"signal/volume_coverage_25/centered_abs_mean": 3.609103034207806e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 4.8652312312214006e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.622222226858139,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.6091031579976727e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.6091031579976727e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 8.663491951610425e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 1.1307943567628876e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9722222208976745,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.663492540549045e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 8.663492540549045e-11,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.21511125000674836,
|
|
"eval_calibration/batch_distribution_entropy": 0.91558582212121,
|
|
"eval_calibration/buffer_distribution_entropy": 0.916314646286911,
|
|
"eval_calibration/confidence_entropy": 0.5371354515997979,
|
|
"eval_calibration/coverage@0%": 0.11544578853046594,
|
|
"eval_calibration/coverage@1%": 0.11544578853046594,
|
|
"eval_calibration/coverage@10%": 0.2196124551971326,
|
|
"eval_calibration/coverage@15%": 0.3522513440860215,
|
|
"eval_calibration/coverage@20%": 0.5835013440860215,
|
|
"eval_calibration/coverage@25%": 0.7750336021505376,
|
|
"eval_calibration/coverage@30%": 0.9894153225806451,
|
|
"eval_calibration/coverage@5%": 0.11544578853046594,
|
|
"eval_calibration/ece": 0.2672830977154876,
|
|
"eval_calibration/mean_confidence": 0.5755224611157268,
|
|
"eval_completions/clipped_ratio": 0.009548611111111105,
|
|
"eval_completions/max_length": 2202.6666666666665,
|
|
"eval_completions/max_terminated_length": 2202.6666666666665,
|
|
"eval_completions/mean_length": 694.9336954752604,
|
|
"eval_completions/mean_terminated_length": 701.6080830891927,
|
|
"eval_completions/min_length": 51.5,
|
|
"eval_completions/min_terminated_length": 259.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 211059718.0,
|
|
"eval_reward": 0.8966165979703268,
|
|
"eval_reward_std": 0.24938206871350607,
|
|
"eval_rewards/accuracy_reward": 0.671006957689921,
|
|
"eval_rewards/brier_reward": 0.7574487229188284,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.885144849618276,
|
|
"eval_rewards/format_reward": 0.9904513855775198,
|
|
"eval_rewards/frontier_aurc_reward": -0.001605092897079885,
|
|
"eval_rewards/frontier_ece_reward": 0.006932677895141144,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9904513855775198,
|
|
"eval_rewards/volume_coverage_0": -5.618685963541814e-11,
|
|
"eval_rewards/volume_coverage_1": -5.618685963541814e-11,
|
|
"eval_rewards/volume_coverage_10": -4.00475674685342e-10,
|
|
"eval_rewards/volume_coverage_15": -1.2276121017443474e-09,
|
|
"eval_rewards/volume_coverage_20": -1.3579284266774145e-09,
|
|
"eval_rewards/volume_coverage_25": -2.1892862060261345e-09,
|
|
"eval_rewards/volume_coverage_5": -5.618685963541814e-11,
|
|
"eval_runtime": 182.1784,
|
|
"eval_samples_per_second": 5.489,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4327799429496129,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.47195852796236676,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21638997147480646,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21638997147480646,
|
|
"eval_signal/advantage_abs_mean": 0.2223157857855161,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2223157857855161,
|
|
"eval_signal/advantage_pre_scale_std": 0.24726740519205728,
|
|
"eval_signal/advantage_std": 0.24726740519205728,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2004946768283844,
|
|
"eval_signal/brier_reward/group_std_mean": 0.25172630945841473,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02004946768283844,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02004946768283844,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.053160481775800385,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08579375346501668,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0053160480844477815,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0053160480844477815,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.018391926928112905,
|
|
"eval_signal/format_reward/group_std_mean": 0.051025692683955036,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963464056453,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.009195963464056453,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0020544196401412287,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0037250108628844223,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5680247138855822e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5680247138855822e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05547807924449444,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.07796540856361389,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005547808172802131,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005547808172802131,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018391926928112905,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.051025692683955036,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018391927005723119,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018391927005723119,
|
|
"eval_signal/volume_coverage_0/centered_abs_mean": 9.294568471355605e-10,
|
|
"eval_signal/volume_coverage_0/group_std_mean": 1.2967965983453251e-09,
|
|
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.944444457689921,
|
|
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.294568789388243e-11,
|
|
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 9.294568789388243e-11,
|
|
"eval_signal/volume_coverage_1/centered_abs_mean": 9.294568471355605e-10,
|
|
"eval_signal/volume_coverage_1/group_std_mean": 1.2967965983453251e-09,
|
|
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.944444457689921,
|
|
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.294568789388243e-11,
|
|
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 9.294568789388243e-11,
|
|
"eval_signal/volume_coverage_10/centered_abs_mean": 1.7053908031120872e-09,
|
|
"eval_signal/volume_coverage_10/group_std_mean": 2.3690177065420017e-09,
|
|
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.9166666865348816,
|
|
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7053908372283155e-10,
|
|
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.7053908372283155e-10,
|
|
"eval_signal/volume_coverage_15/centered_abs_mean": 4.71717971397186e-09,
|
|
"eval_signal/volume_coverage_15/group_std_mean": 6.529600547755532e-09,
|
|
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.8333333532015482,
|
|
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.717179271617374e-10,
|
|
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 4.717179271617374e-10,
|
|
"eval_signal/volume_coverage_20/centered_abs_mean": 1.316934451304578e-08,
|
|
"eval_signal/volume_coverage_20/group_std_mean": 1.807888804402265e-08,
|
|
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.750000019868215,
|
|
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.3169344266715048e-09,
|
|
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 1.3169344266715048e-09,
|
|
"eval_signal/volume_coverage_25/centered_abs_mean": 3.255522570041099e-08,
|
|
"eval_signal/volume_coverage_25/group_std_mean": 4.442350100738087e-08,
|
|
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.6666666766007742,
|
|
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.255522502039939e-09,
|
|
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 3.255522502039939e-09,
|
|
"eval_signal/volume_coverage_5/centered_abs_mean": 9.294568471355605e-10,
|
|
"eval_signal/volume_coverage_5/group_std_mean": 1.2967965983453251e-09,
|
|
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.944444457689921,
|
|
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 9.294568789388243e-11,
|
|
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 9.294568789388243e-11,
|
|
"eval_steps_per_second": 0.033,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31272345586761635,
|
|
"calibration/batch_distribution_entropy": 0.9726450472289901,
|
|
"calibration/buffer_distribution_entropy": 0.9188371377258736,
|
|
"calibration/confidence_entropy": 0.5335367788600449,
|
|
"calibration/coverage@0%": 0.02269584371857099,
|
|
"calibration/coverage@1%": 0.02269584371857099,
|
|
"calibration/coverage@10%": 0.1300949839302112,
|
|
"calibration/coverage@15%": 0.19966343631116357,
|
|
"calibration/coverage@20%": 0.2285751068867524,
|
|
"calibration/coverage@25%": 0.29510949842114387,
|
|
"calibration/coverage@30%": 0.4522562592221083,
|
|
"calibration/coverage@5%": 0.08565880668153394,
|
|
"calibration/ece": 0.15692051533719759,
|
|
"calibration/mean_confidence": 0.5591746107834116,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012934027777777768,
|
|
"completions/max_length": 3715.6,
|
|
"completions/max_terminated_length": 3715.6,
|
|
"completions/mean_length": 702.8796997070312,
|
|
"completions/mean_terminated_length": 712.0661743164062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 191.0,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.00048026847071014345,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.011,
|
|
"num_tokens": 222233756.0,
|
|
"reward": 0.9744926452636719,
|
|
"reward_std": 0.1430598109960556,
|
|
"rewards/accuracy_reward": 0.6703125,
|
|
"rewards/brier_reward": 0.7576799392700195,
|
|
"rewards/confidence_uniqueness_reward": 0.939336609840393,
|
|
"rewards/format_reward": 0.9868923544883728,
|
|
"rewards/frontier_aurc_reward": -0.0014964503003284334,
|
|
"rewards/frontier_ece_reward": 0.006954653561115265,
|
|
"rewards/frontier_entropy_batch_reward": -0.24488165378570556,
|
|
"rewards/volume_coverage_0": -2.528729010908837e-10,
|
|
"rewards/volume_coverage_1": -2.528729010908837e-10,
|
|
"rewards/volume_coverage_10": -8.753370007996697e-10,
|
|
"rewards/volume_coverage_15": -1.2441597280199445e-09,
|
|
"rewards/volume_coverage_20": -1.8202452739535246e-09,
|
|
"rewards/volume_coverage_25": -4.066257730528378e-09,
|
|
"rewards/volume_coverage_5": -2.528729010908837e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1658420145511627,
|
|
"signal/accuracy_reward/group_std_mean": 0.21986316740512848,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37777777910232546,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08292100727558135,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08292100727558135,
|
|
"signal/advantage_abs_mean": 0.10654713213443756,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10654713213443756,
|
|
"signal/advantage_pre_scale_std": 0.16769869327545167,
|
|
"signal/advantage_std": 0.16769869327545167,
|
|
"signal/brier_reward/centered_abs_mean": 0.16641111969947814,
|
|
"signal/brier_reward/group_std_mean": 0.21107023060321808,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016641111671924592,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016641111671924592,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03133438862860203,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05064094811677933,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003133438853546977,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003133438853546977,
|
|
"signal/format_reward/centered_abs_mean": 0.02077365480363369,
|
|
"signal/format_reward/group_std_mean": 0.038099339604377745,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010386827401816845,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010386827401816845,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001442649750970304,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023628756869584324,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.803312261472456e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.803312261472456e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04892703369259834,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06745585799217224,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048927033320069315,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048927033320069315,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.319078129529953,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39101446866989137,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03190781399607658,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03190781399607658,
|
|
"signal/volume_coverage_0/centered_abs_mean": 1.384534620285649e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.8525299072535972e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9361111164093018,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.3845346188978702e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.3845346188978702e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 1.384534620285649e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.8525299072535972e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9361111164093018,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.3845346188978702e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.3845346188978702e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.9901749964977853e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 4.043323897917617e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.8861111164093017,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.9901749062921644e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.9901749062921644e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 7.97301115129656e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 1.0545502338032975e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.7861111044883728,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.973011320605572e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 7.973011320605572e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 1.6036694550081164e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.1090238888632484e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7138888955116272,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.6036695527077426e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.6036695527077426e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 6.605738356313395e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 8.634911559113334e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.5916666924953461,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 6.60573808985987e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 6.60573808985987e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.384534620285649e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 1.8525299072535972e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9361111164093018,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.3845346188978702e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.3845346188978702e-10,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20659424723386605,
|
|
"calibration/batch_distribution_entropy": 0.9763259743928365,
|
|
"calibration/buffer_distribution_entropy": 0.9241441777845083,
|
|
"calibration/confidence_entropy": 0.5103205997163243,
|
|
"calibration/coverage@0%": 0.035508158669289906,
|
|
"calibration/coverage@1%": 0.035508158669289906,
|
|
"calibration/coverage@10%": 0.21171301071454668,
|
|
"calibration/coverage@15%": 0.2847814132315914,
|
|
"calibration/coverage@20%": 0.49886641517706865,
|
|
"calibration/coverage@25%": 0.6624328588503937,
|
|
"calibration/coverage@30%": 0.8575227068636678,
|
|
"calibration/coverage@5%": 0.09811422916537346,
|
|
"calibration/ece": 0.17949915830122162,
|
|
"calibration/mean_confidence": 0.5596010988492515,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008593749999999978,
|
|
"completions/max_length": 3468.4,
|
|
"completions/max_terminated_length": 3468.4,
|
|
"completions/mean_length": 720.1177124023437,
|
|
"completions/mean_terminated_length": 726.3704467773438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 211.6,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.0004107690474484116,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0061,
|
|
"num_tokens": 233637960.0,
|
|
"reward": 0.9943800806999207,
|
|
"reward_std": 0.13367998898029326,
|
|
"rewards/accuracy_reward": 0.7108507037162781,
|
|
"rewards/brier_reward": 0.7597374558448792,
|
|
"rewards/confidence_uniqueness_reward": 0.9419220328330994,
|
|
"rewards/format_reward": 0.9913194417953491,
|
|
"rewards/frontier_aurc_reward": -0.0014706589048728348,
|
|
"rewards/frontier_ece_reward": 0.0020732904551550744,
|
|
"rewards/frontier_entropy_batch_reward": -0.27059880197048186,
|
|
"rewards/volume_coverage_0": -2.5791634140903683e-10,
|
|
"rewards/volume_coverage_1": -2.5791634140903683e-10,
|
|
"rewards/volume_coverage_10": -4.4300731728841304e-10,
|
|
"rewards/volume_coverage_15": -7.582023817814809e-10,
|
|
"rewards/volume_coverage_20": -1.0568040151315295e-09,
|
|
"rewards/volume_coverage_25": -4.2383120346656256e-09,
|
|
"rewards/volume_coverage_5": -2.5791634140903683e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15238172709941863,
|
|
"signal/accuracy_reward/group_std_mean": 0.20750262439250947,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3777777850627899,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07619086354970932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07619086354970932,
|
|
"signal/advantage_abs_mean": 0.09959981143474579,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09959981143474579,
|
|
"signal/advantage_pre_scale_std": 0.15816081762313844,
|
|
"signal/advantage_std": 0.15816081762313844,
|
|
"signal/brier_reward/centered_abs_mean": 0.1581783950328827,
|
|
"signal/brier_reward/group_std_mean": 0.20072446763515472,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015817839279770853,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015817839279770853,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026568013057112694,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04261137619614601,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002656801464036107,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002656801464036107,
|
|
"signal/format_reward/centered_abs_mean": 0.01472439244389534,
|
|
"signal/format_reward/group_std_mean": 0.02825543247163296,
|
|
"signal/format_reward/group_zero_std_frac": 0.8805555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00736219622194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00736219622194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014523042133077978,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002319393353536725,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.815380292100599e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.815380292100599e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.045850321650505066,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06342493891716003,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0045850323513150215,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0045850323513150215,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33311462998390196,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4045804440975189,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03331146351993084,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03331146351993084,
|
|
"signal/volume_coverage_0/centered_abs_mean": 4.972203385134044e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 6.490914430945516e-10,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9861111164093017,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.9722035603411153e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.9722035603411153e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 4.972203385134044e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 6.490914430945516e-10,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9861111164093017,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.9722035603411153e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.9722035603411153e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 7.563527429366168e-10,
|
|
"signal/volume_coverage_10/group_std_mean": 9.919285573478653e-10,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9722222208976745,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.563527937640146e-11,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 7.563527937640146e-11,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.5424102620120728e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 2.014626616919646e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.9277777791023254,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.5424102906350102e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.5424102906350102e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 3.1644094095062413e-09,
|
|
"signal/volume_coverage_20/group_std_mean": 4.138002096532034e-09,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.8527777910232544,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.1644093812302487e-10,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 3.1644093812302487e-10,
|
|
"signal/volume_coverage_25/centered_abs_mean": 1.765938459907801e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 2.32815502876349e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.75,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.765938506537168e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.765938506537168e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 4.972203385134044e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 6.490914430945516e-10,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9861111164093017,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.9722035603411153e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.9722035603411153e-11,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32860315877388296,
|
|
"calibration/batch_distribution_entropy": 0.977465822883542,
|
|
"calibration/buffer_distribution_entropy": 0.929192606743824,
|
|
"calibration/confidence_entropy": 0.5185033600393403,
|
|
"calibration/coverage@0%": 0.008987521330787587,
|
|
"calibration/coverage@1%": 0.008987521330787587,
|
|
"calibration/coverage@10%": 0.07148127660801809,
|
|
"calibration/coverage@15%": 0.12962492282248475,
|
|
"calibration/coverage@20%": 0.25861813025028974,
|
|
"calibration/coverage@25%": 0.47630181669848853,
|
|
"calibration/coverage@30%": 0.5781705600900647,
|
|
"calibration/coverage@5%": 0.0321454160676297,
|
|
"calibration/ece": 0.19758046540227037,
|
|
"calibration/mean_confidence": 0.5206799510790493,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014236111111111093,
|
|
"completions/max_length": 3348.8,
|
|
"completions/max_terminated_length": 3348.8,
|
|
"completions/mean_length": 720.4413208007812,
|
|
"completions/mean_terminated_length": 730.9393432617187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 194.8,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.0003260687808506191,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0109,
|
|
"num_tokens": 245016644.0,
|
|
"reward": 0.9704742431640625,
|
|
"reward_std": 0.1369811251759529,
|
|
"rewards/accuracy_reward": 0.6642361164093018,
|
|
"rewards/brier_reward": 0.7601930141448975,
|
|
"rewards/confidence_uniqueness_reward": 0.9377529859542847,
|
|
"rewards/format_reward": 0.9857638835906982,
|
|
"rewards/frontier_aurc_reward": -0.0015619280282407999,
|
|
"rewards/frontier_ece_reward": 0.007641966454684734,
|
|
"rewards/frontier_entropy_batch_reward": -0.2506504714488983,
|
|
"rewards/volume_coverage_0": 1.3546747751336242e-11,
|
|
"rewards/volume_coverage_1": 1.3546747751336242e-11,
|
|
"rewards/volume_coverage_10": -3.578200222231764e-10,
|
|
"rewards/volume_coverage_15": -9.904014358630419e-10,
|
|
"rewards/volume_coverage_20": -2.8937101603787595e-10,
|
|
"rewards/volume_coverage_25": 8.38249629986354e-09,
|
|
"rewards/volume_coverage_5": 1.3546747751336242e-11,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15470920205116273,
|
|
"signal/accuracy_reward/group_std_mean": 0.20263281166553498,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.425,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07735460102558137,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07735460102558137,
|
|
"signal/advantage_abs_mean": 0.10298931151628495,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10298931151628495,
|
|
"signal/advantage_pre_scale_std": 0.16549083590507507,
|
|
"signal/advantage_std": 0.16549083590507507,
|
|
"signal/brier_reward/centered_abs_mean": 0.15902409851551055,
|
|
"signal/brier_reward/group_std_mean": 0.20213948488235473,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015902410633862017,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015902410633862017,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03237666189670563,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05298796966671944,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003237666329368949,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003237666329368949,
|
|
"signal/format_reward/centered_abs_mean": 0.02151692695915699,
|
|
"signal/format_reward/group_std_mean": 0.04006081186234951,
|
|
"signal/format_reward/group_zero_std_frac": 0.8361111283302307,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010758463479578494,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010758463479578494,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014225503196939825,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022425684612244366,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.77818792508333e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.77818792508333e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04533173516392708,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06232137307524681,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004533173609524965,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004533173609524965,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3208661198616028,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39007292985916137,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032086612284183504,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032086612284183504,
|
|
"signal/volume_coverage_0/centered_abs_mean": 1.1528306809172051e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.4859115216037111e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9666666626930237,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1528306122221555e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.1528306122221555e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 1.1528306809172051e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.4859115216037111e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9666666626930237,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1528306122221555e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.1528306122221555e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.885503325167349e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 3.783502866561151e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.885503572191972e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.885503572191972e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 5.144241055354826e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 6.739672198108337e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8416666626930237,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.144241116417092e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.144241116417092e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 1.0344034517828504e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 1.3228679662802278e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7916666507720947,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 1.0344034573339656e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 1.0344034573339656e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 3.0012533969170366e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 3.899686404906788e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.6666666865348816,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.0012535123802307e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.0012535123802307e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.1528306809172051e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 1.4859115216037111e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9666666626930237,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1528306122221555e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.1528306122221555e-10,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33500062539729397,
|
|
"calibration/batch_distribution_entropy": 0.956171359928593,
|
|
"calibration/buffer_distribution_entropy": 0.9341393129252076,
|
|
"calibration/confidence_entropy": 0.5175410523940116,
|
|
"calibration/coverage@0%": 0.007331317850762675,
|
|
"calibration/coverage@1%": 0.007331317850762675,
|
|
"calibration/coverage@10%": 0.02520131922495534,
|
|
"calibration/coverage@15%": 0.23602583482204204,
|
|
"calibration/coverage@20%": 0.42010581283495946,
|
|
"calibration/coverage@25%": 0.5450955063899958,
|
|
"calibration/coverage@30%": 0.5738903394255874,
|
|
"calibration/coverage@5%": 0.007331317850762675,
|
|
"calibration/ece": 0.17458800836962912,
|
|
"calibration/mean_confidence": 0.5967470814550844,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009982638888888885,
|
|
"completions/max_length": 3088.2,
|
|
"completions/max_terminated_length": 3088.2,
|
|
"completions/mean_length": 718.2924560546875,
|
|
"completions/mean_terminated_length": 725.5123291015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 247.0,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.0002902206324506551,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.0084,
|
|
"num_tokens": 256373229.0,
|
|
"reward": 0.9828748226165771,
|
|
"reward_std": 0.13469320088624953,
|
|
"rewards/accuracy_reward": 0.686024296283722,
|
|
"rewards/brier_reward": 0.7709903001785279,
|
|
"rewards/confidence_uniqueness_reward": 0.940601646900177,
|
|
"rewards/format_reward": 0.9899305582046509,
|
|
"rewards/frontier_aurc_reward": -0.0016850390122272075,
|
|
"rewards/frontier_ece_reward": 0.0074638242833316324,
|
|
"rewards/frontier_entropy_batch_reward": -0.2698711782693863,
|
|
"rewards/volume_coverage_0": -1.165113189954825e-10,
|
|
"rewards/volume_coverage_1": -1.165113189954825e-10,
|
|
"rewards/volume_coverage_10": -3.5378330993929286e-10,
|
|
"rewards/volume_coverage_15": -3.960055356549974e-10,
|
|
"rewards/volume_coverage_20": -5.185866753382173e-10,
|
|
"rewards/volume_coverage_25": -1.3467162430363944e-09,
|
|
"rewards/volume_coverage_5": -4.011809402043598e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15410698801279069,
|
|
"signal/accuracy_reward/group_std_mean": 0.20190810561180114,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.425,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07705349400639534,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07705349400639534,
|
|
"signal/advantage_abs_mean": 0.10197492688894272,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10197492688894272,
|
|
"signal/advantage_pre_scale_std": 0.16204161643981935,
|
|
"signal/advantage_std": 0.16204161643981935,
|
|
"signal/brier_reward/centered_abs_mean": 0.1546614795923233,
|
|
"signal/brier_reward/group_std_mean": 0.19821046888828278,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015466148406267166,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015466148406267166,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02829754091799259,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04569016918540001,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028297540731728075,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028297540731728075,
|
|
"signal/format_reward/centered_abs_mean": 0.01647135429084301,
|
|
"signal/format_reward/group_std_mean": 0.03150532059371471,
|
|
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008235677145421505,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008235677145421505,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016746392473578453,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002678707940503955,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0932990810251795e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0932990810251795e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0423102580010891,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05887846276164055,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004231025744229555,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004231025744229555,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33146599531173704,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4032399892807007,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03314659893512726,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03314659893512726,
|
|
"signal/volume_coverage_0/centered_abs_mean": 1.1543410685654897e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.4904493889078552e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9472222208976746,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1543411222551813e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.1543411222551813e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 1.1543410685654897e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.4904493889078552e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9472222208976746,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1543411222551813e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.1543411222551813e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.826916784854806e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 4.964569447207801e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9361111044883728,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.8269166720110437e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.8269166720110437e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 4.632062246268109e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 6.021744061723844e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.9305555582046509,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 4.632062288855571e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 4.632062288855571e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 8.57383582464344e-09,
|
|
"signal/volume_coverage_20/group_std_mean": 1.1109397090452999e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.8388888835906982,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 8.573836485399611e-10,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 8.573836485399611e-10,
|
|
"signal/volume_coverage_25/centered_abs_mean": 2.939619259678672e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 3.8684455194032405e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.8027777791023254,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.9396191353510404e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.9396191353510404e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 2.215817353212746e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 2.8131893509142357e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9444444417953491,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.215817306982365e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.215817306982365e-10,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1915708186268675,
|
|
"calibration/batch_distribution_entropy": 0.9721579261742956,
|
|
"calibration/buffer_distribution_entropy": 0.9370939672508353,
|
|
"calibration/confidence_entropy": 0.5153446337077295,
|
|
"calibration/coverage@0%": 0.031854188571992154,
|
|
"calibration/coverage@1%": 0.031854188571992154,
|
|
"calibration/coverage@10%": 0.24795650273649508,
|
|
"calibration/coverage@15%": 0.3973872063858091,
|
|
"calibration/coverage@20%": 0.5275370559863992,
|
|
"calibration/coverage@25%": 0.7552314671774273,
|
|
"calibration/coverage@30%": 0.8777631923220005,
|
|
"calibration/coverage@5%": 0.16669963012195738,
|
|
"calibration/ece": 0.1816904653658499,
|
|
"calibration/mean_confidence": 0.5460239694308244,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008854166666666673,
|
|
"completions/max_length": 3260.4,
|
|
"completions/max_terminated_length": 3260.4,
|
|
"completions/mean_length": 735.0815063476563,
|
|
"completions/mean_terminated_length": 741.7195556640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 224.6,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.00040568591793999076,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0081,
|
|
"num_tokens": 267959032.0,
|
|
"reward": 0.9793904066085816,
|
|
"reward_std": 0.13277052342891693,
|
|
"rewards/accuracy_reward": 0.6761284828186035,
|
|
"rewards/brier_reward": 0.7679849863052368,
|
|
"rewards/confidence_uniqueness_reward": 0.9425747990608215,
|
|
"rewards/format_reward": 0.9911458253860473,
|
|
"rewards/frontier_aurc_reward": -0.0013498676475137473,
|
|
"rewards/frontier_ece_reward": 0.006347721349447966,
|
|
"rewards/frontier_entropy_batch_reward": -0.259206211566925,
|
|
"rewards/volume_coverage_0": -8.512854388423308e-11,
|
|
"rewards/volume_coverage_1": -8.512854388423308e-11,
|
|
"rewards/volume_coverage_10": -8.512854388423308e-11,
|
|
"rewards/volume_coverage_15": -2.287901209631349e-10,
|
|
"rewards/volume_coverage_20": -6.88414039662355e-10,
|
|
"rewards/volume_coverage_25": -7.95366701389355e-09,
|
|
"rewards/volume_coverage_5": -8.512854388423308e-11,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.155517578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2059672147035599,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4138888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0777587890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0777587890625,
|
|
"signal/advantage_abs_mean": 0.10053557455539704,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10053557455539704,
|
|
"signal/advantage_pre_scale_std": 0.15793273150920867,
|
|
"signal/advantage_std": 0.15793273150920867,
|
|
"signal/brier_reward/centered_abs_mean": 0.15555652379989623,
|
|
"signal/brier_reward/group_std_mean": 0.1986626386642456,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015555652230978012,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015555652230978012,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026127389818429946,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04029006510972977,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002612739009782672,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002612739009782672,
|
|
"signal/format_reward/centered_abs_mean": 0.01452907994389534,
|
|
"signal/format_reward/group_std_mean": 0.026035796478390695,
|
|
"signal/format_reward/group_zero_std_frac": 0.8972222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00726453997194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00726453997194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013476456748321652,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002196387154981494,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.68455708262627e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.68455708262627e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.043793865293264386,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05965333953499794,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0043793866410851475,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0043793866410851475,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32110402584075926,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39466953873634336,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03211040273308754,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03211040273308754,
|
|
"signal/volume_coverage_0/centered_abs_mean": 2.513125332836985e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 3.27953761369848e-10,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 1.0,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 2.513125332836985e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 3.27953761369848e-10,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 1.0,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.513125332836985e-10,
|
|
"signal/volume_coverage_10/group_std_mean": 3.27953761369848e-10,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 1.0,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"signal/volume_coverage_15/centered_abs_mean": 6.385222087179621e-10,
|
|
"signal/volume_coverage_15/group_std_mean": 8.169314752448642e-10,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.9833333373069764,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.385222038607363e-11,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.385222038607363e-11,
|
|
"signal/volume_coverage_20/centered_abs_mean": 5.783205347942299e-09,
|
|
"signal/volume_coverage_20/group_std_mean": 7.703742019304904e-09,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.8611111044883728,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 5.783205683437819e-10,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 5.783205683437819e-10,
|
|
"signal/volume_coverage_25/centered_abs_mean": 3.0315699994787335e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 4.013403822611394e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.7388888835906983,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.0315701338157197e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.0315701338157197e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 2.513125332836985e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 3.27953761369848e-10,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 1.0,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.5131253675314547e-11,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2339408439391594,
|
|
"calibration/batch_distribution_entropy": 0.961508234381926,
|
|
"calibration/buffer_distribution_entropy": 0.9402565004280987,
|
|
"calibration/confidence_entropy": 0.4876715529200112,
|
|
"calibration/coverage@0%": 0.026255400448416892,
|
|
"calibration/coverage@1%": 0.026255400448416892,
|
|
"calibration/coverage@10%": 0.17620168770352224,
|
|
"calibration/coverage@15%": 0.30992770755313526,
|
|
"calibration/coverage@20%": 0.4616251066347946,
|
|
"calibration/coverage@25%": 0.5897109719923753,
|
|
"calibration/coverage@30%": 0.6721060609134154,
|
|
"calibration/coverage@5%": 0.08652022772895099,
|
|
"calibration/ece": 0.11093919101297997,
|
|
"calibration/mean_confidence": 0.5869900882516286,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013802083333333348,
|
|
"completions/max_length": 3537.0,
|
|
"completions/max_terminated_length": 3537.0,
|
|
"completions/mean_length": 775.1760620117187,
|
|
"completions/mean_terminated_length": 786.0658081054687,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 215.4,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.0003663992101792246,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0105,
|
|
"num_tokens": 280013860.0,
|
|
"reward": 0.9698229193687439,
|
|
"reward_std": 0.14345374405384065,
|
|
"rewards/accuracy_reward": 0.6619791626930237,
|
|
"rewards/brier_reward": 0.7658138871192932,
|
|
"rewards/confidence_uniqueness_reward": 0.9374646186828614,
|
|
"rewards/format_reward": 0.9858506798744202,
|
|
"rewards/frontier_aurc_reward": -0.0014807431260123848,
|
|
"rewards/frontier_ece_reward": 0.008980327052995563,
|
|
"rewards/frontier_entropy_batch_reward": -0.2529941201210022,
|
|
"rewards/volume_coverage_0": -1.795900022444341e-10,
|
|
"rewards/volume_coverage_1": -1.795900022444341e-10,
|
|
"rewards/volume_coverage_10": 3.8563873101804536e-10,
|
|
"rewards/volume_coverage_15": 7.631051734957594e-10,
|
|
"rewards/volume_coverage_20": 2.1685917506175656e-09,
|
|
"rewards/volume_coverage_25": 3.7465909930034515e-09,
|
|
"rewards/volume_coverage_5": -1.795900022444341e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17249349057674407,
|
|
"signal/accuracy_reward/group_std_mean": 0.223880273103714,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3805555641651154,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08624674528837203,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08624674528837203,
|
|
"signal/advantage_abs_mean": 0.10982311069965363,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10982311069965363,
|
|
"signal/advantage_pre_scale_std": 0.16914137005805968,
|
|
"signal/advantage_std": 0.16914137005805968,
|
|
"signal/brier_reward/centered_abs_mean": 0.1637795925140381,
|
|
"signal/brier_reward/group_std_mean": 0.20636882185935973,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01637795865535736,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01637795865535736,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03288912586867809,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.051013688743114474,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003288912633433938,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003288912633433938,
|
|
"signal/format_reward/centered_abs_mean": 0.02181532122194767,
|
|
"signal/format_reward/group_std_mean": 0.037739118188619615,
|
|
"signal/format_reward/group_zero_std_frac": 0.8499999880790711,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010907660610973835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010907660610973835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015587236033752561,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024250032845884563,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9484045697026887e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9484045697026887e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04549378007650375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.061597873270511624,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004549377970397473,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004549377970397473,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.314960116147995,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38633153438568113,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0314960103482008,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0314960103482008,
|
|
"signal/volume_coverage_0/centered_abs_mean": 8.251522298774461e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 1.1054033466351142e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9638888955116272,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.251521826929675e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.251521826929675e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 8.251522298774461e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 1.1054033466351142e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9638888955116272,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.251521826929675e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.251521826929675e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.2939792010401305e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 3.0097725312039357e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9138888835906982,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.2939793103277096e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.2939793103277096e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 3.3565182355754787e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 4.396137531781364e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.8833333253860474,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.356518269923003e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.356518269923003e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 2.0850407000949645e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 2.83504333786766e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.7777778029441833,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 2.0850406567962663e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 2.0850406567962663e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 4.885320112890668e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 6.617217529480968e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.6944444656372071,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 4.885319837555358e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 4.885319837555358e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 8.251522298774461e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 1.1054033466351142e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9638888955116272,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 8.251521826929675e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 8.251521826929675e-11,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21908669961435798,
|
|
"calibration/batch_distribution_entropy": 0.9311462546505785,
|
|
"calibration/buffer_distribution_entropy": 0.9416415589799986,
|
|
"calibration/confidence_entropy": 0.4657020648208376,
|
|
"calibration/coverage@0%": 0.03239955997814936,
|
|
"calibration/coverage@1%": 0.03239955997814936,
|
|
"calibration/coverage@10%": 0.31323954224765294,
|
|
"calibration/coverage@15%": 0.36411866007444027,
|
|
"calibration/coverage@20%": 0.4666335057409091,
|
|
"calibration/coverage@25%": 0.6104226811960738,
|
|
"calibration/coverage@30%": 0.7054088223168888,
|
|
"calibration/coverage@5%": 0.21509016281502874,
|
|
"calibration/ece": 0.17209419891679043,
|
|
"calibration/mean_confidence": 0.6375476748236272,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011024305555555558,
|
|
"completions/max_length": 3435.4,
|
|
"completions/max_terminated_length": 3435.4,
|
|
"completions/mean_length": 770.680126953125,
|
|
"completions/mean_terminated_length": 779.2688720703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 214.0,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.000423381949076429,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.0083,
|
|
"num_tokens": 291985119.0,
|
|
"reward": 0.9828216791152954,
|
|
"reward_std": 0.13931744694709777,
|
|
"rewards/accuracy_reward": 0.6855034589767456,
|
|
"rewards/brier_reward": 0.775595772266388,
|
|
"rewards/confidence_uniqueness_reward": 0.9389704704284668,
|
|
"rewards/format_reward": 0.9888888955116272,
|
|
"rewards/frontier_aurc_reward": -0.0015716422349214554,
|
|
"rewards/frontier_ece_reward": 0.008903909381479025,
|
|
"rewards/frontier_entropy_batch_reward": -0.2670187473297119,
|
|
"rewards/volume_coverage_0": -1.3643474848154536e-11,
|
|
"rewards/volume_coverage_1": -1.3643474848154536e-11,
|
|
"rewards/volume_coverage_10": -3.4253438757278486e-11,
|
|
"rewards/volume_coverage_15": -3.859347159126253e-10,
|
|
"rewards/volume_coverage_20": 7.330478162849429e-10,
|
|
"rewards/volume_coverage_25": 1.211570410664109e-09,
|
|
"rewards/volume_coverage_5": -1.3643474848154536e-11,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16392686516046523,
|
|
"signal/accuracy_reward/group_std_mean": 0.21705200970172883,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08196343258023261,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08196343258023261,
|
|
"signal/advantage_abs_mean": 0.10422161519527436,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10422161519527436,
|
|
"signal/advantage_pre_scale_std": 0.1652237981557846,
|
|
"signal/advantage_std": 0.1652237981557846,
|
|
"signal/brier_reward/centered_abs_mean": 0.1610366255044937,
|
|
"signal/brier_reward/group_std_mean": 0.20538146793842316,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016103663109242917,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016103663109242917,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030462343245744705,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.048246100544929504,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030462343711405993,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030462343711405993,
|
|
"signal/format_reward/centered_abs_mean": 0.01842447929084301,
|
|
"signal/format_reward/group_std_mean": 0.03367025405168533,
|
|
"signal/format_reward/group_zero_std_frac": 0.8666666746139526,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009212239645421505,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009212239645421505,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018255119677633048,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002912291418761015,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2818900833954104e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2818900833954104e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04268218874931336,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05850542336702347,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004268218902871013,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004268218902871013,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31653188467025756,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38656463027000426,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03165318816900253,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03165318816900253,
|
|
"signal/volume_coverage_0/centered_abs_mean": 4.996748605157197e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 6.554245639278644e-10,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 4.996748501073789e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 4.996748501073789e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 4.996748605157197e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 6.554245639278644e-10,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 4.996748501073789e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 4.996748501073789e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 1.0024358822191547e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 1.3108416178908567e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9666666746139526,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.002435957159209e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.002435957159209e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 2.1058243517002584e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 2.7569074960431817e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.925000011920929,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.1058243517002584e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.1058243517002584e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 7.328173268206228e-09,
|
|
"signal/volume_coverage_20/group_std_mean": 9.528951983028833e-09,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.8055555582046509,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.328173290410689e-10,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 7.328173290410689e-10,
|
|
"signal/volume_coverage_25/centered_abs_mean": 1.6980138717315187e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 2.177776963208089e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.7194444537162781,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 1.6980139905253821e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 1.6980139905253821e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 4.996748605157197e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 6.554245639278644e-10,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.996748501073789e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.996748501073789e-11,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18688488282444396,
|
|
"calibration/batch_distribution_entropy": 0.9612602498585673,
|
|
"calibration/buffer_distribution_entropy": 0.946935134946479,
|
|
"calibration/confidence_entropy": 0.4878221424685513,
|
|
"calibration/coverage@0%": 0.010068253342126169,
|
|
"calibration/coverage@1%": 0.010068253342126169,
|
|
"calibration/coverage@10%": 0.15910374872918792,
|
|
"calibration/coverage@15%": 0.449369630860614,
|
|
"calibration/coverage@20%": 0.559990037943999,
|
|
"calibration/coverage@25%": 0.8223586188910149,
|
|
"calibration/coverage@30%": 0.9565088720029576,
|
|
"calibration/coverage@5%": 0.09463446205940726,
|
|
"calibration/ece": 0.16027606652347887,
|
|
"calibration/mean_confidence": 0.588729914691588,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014236111111111116,
|
|
"completions/max_length": 3632.4,
|
|
"completions/max_terminated_length": 3632.4,
|
|
"completions/mean_length": 776.3033935546875,
|
|
"completions/mean_terminated_length": 787.4848510742188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 253.0,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.00032712117535993457,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0098,
|
|
"num_tokens": 304032358.0,
|
|
"reward": 0.9728388667106629,
|
|
"reward_std": 0.13903119266033173,
|
|
"rewards/accuracy_reward": 0.6722222208976746,
|
|
"rewards/brier_reward": 0.7670191049575805,
|
|
"rewards/confidence_uniqueness_reward": 0.9365373611450195,
|
|
"rewards/format_reward": 0.9854166746139527,
|
|
"rewards/frontier_aurc_reward": -0.001542026223614812,
|
|
"rewards/frontier_ece_reward": 0.0071903283707797526,
|
|
"rewards/frontier_entropy_batch_reward": -0.2703594654798508,
|
|
"rewards/volume_coverage_0": -5.332067799443396e-11,
|
|
"rewards/volume_coverage_1": -5.332067799443396e-11,
|
|
"rewards/volume_coverage_10": -1.9090122772499884e-10,
|
|
"rewards/volume_coverage_15": -4.564347600398422e-10,
|
|
"rewards/volume_coverage_20": -6.339565656286739e-11,
|
|
"rewards/volume_coverage_25": -2.0095051789237318e-09,
|
|
"rewards/volume_coverage_5": -1.506728820703518e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15435112714767457,
|
|
"signal/accuracy_reward/group_std_mean": 0.20686088502407074,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07717556357383729,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07717556357383729,
|
|
"signal/advantage_abs_mean": 0.10415131747722625,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10415131747722625,
|
|
"signal/advantage_pre_scale_std": 0.16518069207668304,
|
|
"signal/advantage_std": 0.16518069207668304,
|
|
"signal/brier_reward/centered_abs_mean": 0.15760179460048676,
|
|
"signal/brier_reward/group_std_mean": 0.2008292406797409,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015760179981589316,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015760179981589316,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.033130045235157016,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05183272436261177,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033130045514553784,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033130045514553784,
|
|
"signal/format_reward/centered_abs_mean": 0.021896701864898206,
|
|
"signal/format_reward/group_std_mean": 0.038436245545744895,
|
|
"signal/format_reward/group_zero_std_frac": 0.850000011920929,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010948350932449103,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010948350932449103,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016865363577380776,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0028139258734881877,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.108170556311961e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.108170556311961e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.040233349055051805,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05556822866201401,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004023334989324212,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004023334989324212,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33216995000839233,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40116575360298157,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03321699649095535,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03321699649095535,
|
|
"signal/volume_coverage_0/centered_abs_mean": 5.590514454145712e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 7.51527159253973e-10,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9833333373069764,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.590514960684967e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.590514960684967e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 5.590514454145712e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 7.51527159253973e-10,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9833333373069764,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.590514960684967e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.590514960684967e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 1.7093801332745962e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 2.306350740249874e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.9416666626930237,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.7093802172352123e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.7093802172352123e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 3.268383494403615e-09,
|
|
"signal/volume_coverage_15/group_std_mean": 4.34988126907232e-09,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.9111111044883728,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.2683836219057906e-10,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.2683836219057906e-10,
|
|
"signal/volume_coverage_20/centered_abs_mean": 7.0756116826720115e-09,
|
|
"signal/volume_coverage_20/group_std_mean": 9.405933146244828e-09,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.8527777910232544,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.075611883726462e-10,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 7.075611883726462e-10,
|
|
"signal/volume_coverage_25/centered_abs_mean": 3.317895354015832e-08,
|
|
"signal/volume_coverage_25/group_std_mean": 4.440126604166039e-08,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.7277777791023254,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 3.317895497234602e-09,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 3.317895497234602e-09,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.0367738181860099e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 1.4120832135533501e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9722222208976745,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.036773857737705e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.036773857737705e-10,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17861280601130797,
|
|
"calibration/batch_distribution_entropy": 0.9852219106359359,
|
|
"calibration/buffer_distribution_entropy": 0.9583941655173911,
|
|
"calibration/confidence_entropy": 0.5137185398950166,
|
|
"calibration/coverage@0%": 0.023177774456331256,
|
|
"calibration/coverage@1%": 0.023177774456331256,
|
|
"calibration/coverage@10%": 0.32692182191463137,
|
|
"calibration/coverage@15%": 0.4893533218131497,
|
|
"calibration/coverage@20%": 0.6518234649790168,
|
|
"calibration/coverage@25%": 0.7622667218699385,
|
|
"calibration/coverage@30%": 0.8429098343670999,
|
|
"calibration/coverage@5%": 0.10116614285484485,
|
|
"calibration/ece": 0.17461347138091166,
|
|
"calibration/mean_confidence": 0.4975835279157047,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01206597222222221,
|
|
"completions/max_length": 3274.2,
|
|
"completions/max_terminated_length": 3274.2,
|
|
"completions/mean_length": 787.4185913085937,
|
|
"completions/mean_terminated_length": 797.0513305664062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 265.0,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.0003231425362173468,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0103,
|
|
"num_tokens": 316168028.0,
|
|
"reward": 0.9967975854873657,
|
|
"reward_std": 0.12998930513858795,
|
|
"rewards/accuracy_reward": 0.7133680582046509,
|
|
"rewards/brier_reward": 0.7639730095863342,
|
|
"rewards/confidence_uniqueness_reward": 0.9404920935630798,
|
|
"rewards/format_reward": 0.9879340171813965,
|
|
"rewards/frontier_aurc_reward": -0.001185076031833887,
|
|
"rewards/frontier_ece_reward": 0.0006924874149262905,
|
|
"rewards/frontier_entropy_batch_reward": -0.24353820085525513,
|
|
"rewards/volume_coverage_0": -3.5014671162514335e-10,
|
|
"rewards/volume_coverage_1": -3.5014671162514335e-10,
|
|
"rewards/volume_coverage_10": -1.1576934522139481e-09,
|
|
"rewards/volume_coverage_15": -8.61750595604338e-09,
|
|
"rewards/volume_coverage_20": -1.8163441106722188e-08,
|
|
"rewards/volume_coverage_25": -5.915410927265219e-06,
|
|
"rewards/volume_coverage_5": -3.5014671162514335e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13902994692325593,
|
|
"signal/accuracy_reward/group_std_mean": 0.1906949073076248,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06951497346162797,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06951497346162797,
|
|
"signal/advantage_abs_mean": 0.094663205742836,
|
|
"signal/advantage_pre_scale_abs_mean": 0.094663205742836,
|
|
"signal/advantage_pre_scale_std": 0.1574291467666626,
|
|
"signal/advantage_std": 0.1574291467666626,
|
|
"signal/brier_reward/centered_abs_mean": 0.15350565910339356,
|
|
"signal/brier_reward/group_std_mean": 0.19510267674922943,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015350566431879997,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015350566431879997,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03119339495897293,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.050634662806987765,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003119339654222131,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003119339654222131,
|
|
"signal/format_reward/centered_abs_mean": 0.02065429650247097,
|
|
"signal/format_reward/group_std_mean": 0.03805244565010071,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010327148251235485,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010327148251235485,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013626172440126538,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022952609695494177,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7032716095854993e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7032716095854993e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03810315653681755,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.051458243280649185,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00381031590513885,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00381031590513885,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32137046456336976,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3931168556213379,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0321370467543602,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0321370467543602,
|
|
"signal/volume_coverage_0/centered_abs_mean": 2.1893566043429403e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 2.8316428202224133e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9027777791023255,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.1893565779751434e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 2.1893565779751434e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 2.1893566043429403e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 2.8316428202224133e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9027777791023255,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.1893565779751434e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 2.1893565779751434e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 4.7582652901168035e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 6.146304565302074e-09,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.8388888955116272,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.758265607918144e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 4.758265607918144e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 3.597350177741987e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 4.588340245526012e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.7333333373069764,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 3.597350033135438e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 3.597350033135438e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 7.640569981148815e-08,
|
|
"signal/volume_coverage_20/group_std_mean": 9.747053044684151e-08,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.6500000059604645,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 7.64056946600533e-09,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 7.64056946600533e-09,
|
|
"signal/volume_coverage_25/centered_abs_mean": 2.985676190974118e-05,
|
|
"signal/volume_coverage_25/group_std_mean": 3.802538181005843e-05,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.4666666775941849,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 2.98567600873767e-06,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 2.98567600873767e-06,
|
|
"signal/volume_coverage_5/centered_abs_mean": 2.1893566043429403e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 2.8316428202224133e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9027777791023255,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.1893565779751434e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.1893565779751434e-10,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18029917566613354,
|
|
"calibration/batch_distribution_entropy": 0.9609287430066871,
|
|
"calibration/buffer_distribution_entropy": 0.9680553694533005,
|
|
"calibration/confidence_entropy": 0.5266669694591191,
|
|
"calibration/coverage@0%": 0.045315954372948296,
|
|
"calibration/coverage@1%": 0.045315954372948296,
|
|
"calibration/coverage@10%": 0.42797529882281715,
|
|
"calibration/coverage@15%": 0.5210627786776989,
|
|
"calibration/coverage@20%": 0.5992149526139929,
|
|
"calibration/coverage@25%": 0.6996878254078915,
|
|
"calibration/coverage@30%": 0.7548685125189165,
|
|
"calibration/coverage@5%": 0.17942530214136124,
|
|
"calibration/ece": 0.1919776915795625,
|
|
"calibration/mean_confidence": 0.5383348858104606,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01128472222222221,
|
|
"completions/max_length": 3553.8,
|
|
"completions/max_terminated_length": 3553.8,
|
|
"completions/mean_length": 858.7107666015625,
|
|
"completions/mean_terminated_length": 868.437451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 257.2,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.00037785363383591175,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0076,
|
|
"num_tokens": 329170712.0,
|
|
"reward": 0.9839182257652282,
|
|
"reward_std": 0.13611459136009216,
|
|
"rewards/accuracy_reward": 0.6901041626930237,
|
|
"rewards/brier_reward": 0.7852767705917358,
|
|
"rewards/confidence_uniqueness_reward": 0.9386167883872986,
|
|
"rewards/format_reward": 0.9887152671813965,
|
|
"rewards/frontier_aurc_reward": -0.0013611266971565784,
|
|
"rewards/frontier_ece_reward": 0.00399660924449563,
|
|
"rewards/frontier_entropy_batch_reward": -0.2828936755657196,
|
|
"rewards/volume_coverage_0": -8.655792888001557e-10,
|
|
"rewards/volume_coverage_1": -8.655792888001557e-10,
|
|
"rewards/volume_coverage_10": -1.4397807245047912e-09,
|
|
"rewards/volume_coverage_15": -1.377830247117906e-09,
|
|
"rewards/volume_coverage_20": 1.7711275204335131e-06,
|
|
"rewards/volume_coverage_25": 0.0002565411617979407,
|
|
"rewards/volume_coverage_5": -1.2134155374932121e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15776909440755843,
|
|
"signal/accuracy_reward/group_std_mean": 0.20477839410305024,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43611112236976624,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07888454720377922,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07888454720377922,
|
|
"signal/advantage_abs_mean": 0.10334948301315308,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10334948301315308,
|
|
"signal/advantage_pre_scale_std": 0.16254353821277617,
|
|
"signal/advantage_std": 0.16254353821277617,
|
|
"signal/brier_reward/centered_abs_mean": 0.14287383258342742,
|
|
"signal/brier_reward/group_std_mean": 0.18298307061195374,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014287383668124676,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014287383668124676,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029984532669186593,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.047179107740521434,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002998453238978982,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002998453238978982,
|
|
"signal/format_reward/centered_abs_mean": 0.017936197854578496,
|
|
"signal/format_reward/group_std_mean": 0.032711121067404744,
|
|
"signal/format_reward/group_zero_std_frac": 0.8694444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008968098927289248,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008968098927289248,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016798900673165918,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002934660855680704,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0998626496293582e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0998626496293582e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03310015164315701,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04407154768705368,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003310015145689249,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003310015145689249,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34080212712287905,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41118053793907167,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03408021330833435,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03408021330833435,
|
|
"signal/volume_coverage_0/centered_abs_mean": 3.149529201329493e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 4.050445145953319e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.8833333373069763,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 3.149529425455766e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 3.149529425455766e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 3.149529201329493e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 4.050445145953319e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.8833333373069763,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 3.149529425455766e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 3.149529425455766e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 7.936655319062957e-09,
|
|
"signal/volume_coverage_10/group_std_mean": 1.0478954781678596e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.7555555641651154,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 7.936655232326784e-10,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 7.936655232326784e-10,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.818619645987951e-08,
|
|
"signal/volume_coverage_15/group_std_mean": 2.396414628957899e-08,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.6500000059604645,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.8186196201752658e-09,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.8186196201752658e-09,
|
|
"signal/volume_coverage_20/centered_abs_mean": 4.4338119351783334e-05,
|
|
"signal/volume_coverage_20/group_std_mean": 5.8473115660206965e-05,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.5638888895511627,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 4.433812225967948e-06,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 4.433812225967948e-06,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.0035675803665071726,
|
|
"signal/volume_coverage_25/group_std_mean": 0.004789471440017223,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.03333333432674408,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0003567580570233986,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0003567580570233986,
|
|
"signal/volume_coverage_5/centered_abs_mean": 3.919237409610865e-09,
|
|
"signal/volume_coverage_5/group_std_mean": 5.091148846947391e-09,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.8777777791023255,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.9192375005103754e-10,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.9192375005103754e-10,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.164208079490717,
|
|
"eval_calibration/batch_distribution_entropy": 0.8908815670220108,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9725765790521952,
|
|
"eval_calibration/confidence_entropy": 0.5328010527649214,
|
|
"eval_calibration/coverage@0%": 0.2471438172043011,
|
|
"eval_calibration/coverage@1%": 0.2471438172043011,
|
|
"eval_calibration/coverage@10%": 0.4008736559139785,
|
|
"eval_calibration/coverage@15%": 0.5078965053763441,
|
|
"eval_calibration/coverage@20%": 0.7123655913978495,
|
|
"eval_calibration/coverage@25%": 0.8172043010752689,
|
|
"eval_calibration/coverage@30%": 0.9375,
|
|
"eval_calibration/coverage@5%": 0.2471438172043011,
|
|
"eval_calibration/ece": 0.1731811553540427,
|
|
"eval_calibration/mean_confidence": 0.6129084461244519,
|
|
"eval_completions/clipped_ratio": 0.008680555555555544,
|
|
"eval_completions/max_length": 3040.5,
|
|
"eval_completions/max_terminated_length": 3040.5,
|
|
"eval_completions/mean_length": 835.7632242838541,
|
|
"eval_completions/mean_terminated_length": 843.113779703776,
|
|
"eval_completions/min_length": 73.33333333333333,
|
|
"eval_completions/min_terminated_length": 332.3333333333333,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 329170712.0,
|
|
"eval_reward": 0.9062922497590383,
|
|
"eval_reward_std": 0.24627330154180527,
|
|
"eval_rewards/accuracy_reward": 0.683159718910853,
|
|
"eval_rewards/brier_reward": 0.7905897796154022,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8871917724609375,
|
|
"eval_rewards/format_reward": 0.9904513855775198,
|
|
"eval_rewards/frontier_aurc_reward": -0.0017877337328779201,
|
|
"eval_rewards/frontier_ece_reward": 0.005614791589323431,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9904513855775198,
|
|
"eval_rewards/volume_coverage_0": -2.907263158512252e-10,
|
|
"eval_rewards/volume_coverage_1": -2.907263158512252e-10,
|
|
"eval_rewards/volume_coverage_10": -5.503734481009523e-11,
|
|
"eval_rewards/volume_coverage_15": 2.1709352926446712e-09,
|
|
"eval_rewards/volume_coverage_20": 1.7207810086479942e-05,
|
|
"eval_rewards/volume_coverage_25": 0.002128113391033063,
|
|
"eval_rewards/volume_coverage_5": -2.907263158512252e-10,
|
|
"eval_runtime": 198.2371,
|
|
"eval_samples_per_second": 5.044,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4220377554496129,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4656520187854767,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21101887772480646,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21101887772480646,
|
|
"eval_signal/advantage_abs_mean": 0.21716739485661188,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21716739485661188,
|
|
"eval_signal/advantage_pre_scale_std": 0.2442422236005465,
|
|
"eval_signal/advantage_std": 0.2442422236005465,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.17609463135401407,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2297411933541298,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017609463073313236,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.017609463073313236,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05053987664481004,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0819082868595918,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005053987881789605,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005053987881789605,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.018391926928112905,
|
|
"eval_signal/format_reward/group_std_mean": 0.051025692063073315,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963464056453,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.009195963464056453,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0030478331997680166,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006132548054059346,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.809791511836617e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.809791511836617e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03784426177541415,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.053607478737831116,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037844261775414148,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037844261775414148,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018391926928112905,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.051025692063073315,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018391927393774192,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018391927393774192,
|
|
"eval_signal/volume_coverage_0/centered_abs_mean": 2.2895876074698527e-09,
|
|
"eval_signal/volume_coverage_0/group_std_mean": 3.2587387001810817e-09,
|
|
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.8888889153798422,
|
|
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 2.2895876947842675e-10,
|
|
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 2.2895876947842675e-10,
|
|
"eval_signal/volume_coverage_1/centered_abs_mean": 2.2895876074698527e-09,
|
|
"eval_signal/volume_coverage_1/group_std_mean": 3.2587387001810817e-09,
|
|
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.8888889153798422,
|
|
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 2.2895876947842675e-10,
|
|
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 2.2895876947842675e-10,
|
|
"eval_signal/volume_coverage_10/centered_abs_mean": 1.1983273628020838e-08,
|
|
"eval_signal/volume_coverage_10/group_std_mean": 1.7429221228308183e-08,
|
|
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.7777777910232544,
|
|
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.1983274335209775e-09,
|
|
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 1.1983274335209775e-09,
|
|
"eval_signal/volume_coverage_15/centered_abs_mean": 5.853292239788033e-08,
|
|
"eval_signal/volume_coverage_15/group_std_mean": 8.00400753268408e-08,
|
|
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.5277777860562006,
|
|
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.853291987674887e-09,
|
|
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 5.853291987674887e-09,
|
|
"eval_signal/volume_coverage_20/centered_abs_mean": 0.0003371686664953207,
|
|
"eval_signal/volume_coverage_20/group_std_mean": 0.00047656046808697283,
|
|
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.1944444477558136,
|
|
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 3.371686761965975e-05,
|
|
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 3.371686761965975e-05,
|
|
"eval_signal/volume_coverage_25/centered_abs_mean": 0.010561376344412565,
|
|
"eval_signal/volume_coverage_25/group_std_mean": 0.016184291957567137,
|
|
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00105613767906713,
|
|
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.00105613767906713,
|
|
"eval_signal/volume_coverage_5/centered_abs_mean": 2.2895876074698527e-09,
|
|
"eval_signal/volume_coverage_5/group_std_mean": 3.2587387001810817e-09,
|
|
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.8888889153798422,
|
|
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.2895876947842675e-10,
|
|
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 2.2895876947842675e-10,
|
|
"eval_steps_per_second": 0.03,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18438420495247035,
|
|
"calibration/batch_distribution_entropy": 0.9603641675120886,
|
|
"calibration/buffer_distribution_entropy": 0.9744252500060735,
|
|
"calibration/confidence_entropy": 0.5168124227269436,
|
|
"calibration/coverage@0%": 0.013314838594667516,
|
|
"calibration/coverage@1%": 0.013314838594667516,
|
|
"calibration/coverage@10%": 0.21615471907211553,
|
|
"calibration/coverage@15%": 0.4260975711864175,
|
|
"calibration/coverage@20%": 0.7192844677137871,
|
|
"calibration/coverage@25%": 0.8029532068062826,
|
|
"calibration/coverage@30%": 0.838001745200698,
|
|
"calibration/coverage@5%": 0.10868268600611168,
|
|
"calibration/ece": 0.20437914411274316,
|
|
"calibration/mean_confidence": 0.5928247329203169,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009461805555555558,
|
|
"completions/max_length": 3466.8,
|
|
"completions/max_terminated_length": 3466.8,
|
|
"completions/mean_length": 815.3238647460937,
|
|
"completions/mean_terminated_length": 823.2176879882812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 269.6,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.0003626790421549231,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0074,
|
|
"num_tokens": 341670955.0,
|
|
"reward": 1.007612144947052,
|
|
"reward_std": 0.1327279031276703,
|
|
"rewards/accuracy_reward": 0.7348090291023255,
|
|
"rewards/brier_reward": 0.7994078278541565,
|
|
"rewards/confidence_uniqueness_reward": 0.9393423318862915,
|
|
"rewards/format_reward": 0.9905381917953491,
|
|
"rewards/frontier_aurc_reward": -0.0014131779549643396,
|
|
"rewards/frontier_ece_reward": 0.0025281490292400123,
|
|
"rewards/frontier_entropy_batch_reward": -0.29814456701278685,
|
|
"rewards/volume_coverage_0": -8.647244968684742e-11,
|
|
"rewards/volume_coverage_1": -8.647244968684742e-11,
|
|
"rewards/volume_coverage_10": -5.2008233206168875e-09,
|
|
"rewards/volume_coverage_15": -1.23952112457415e-07,
|
|
"rewards/volume_coverage_20": -0.0001249164422915783,
|
|
"rewards/volume_coverage_25": 0.00655297446064651,
|
|
"rewards/volume_coverage_5": -8.647244968684742e-11,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15657009482383727,
|
|
"signal/accuracy_reward/group_std_mean": 0.203624826669693,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07828504741191863,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07828504741191863,
|
|
"signal/advantage_abs_mean": 0.10121837258338928,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10121837258338928,
|
|
"signal/advantage_pre_scale_std": 0.16051416099071503,
|
|
"signal/advantage_std": 0.16051416099071503,
|
|
"signal/brier_reward/centered_abs_mean": 0.1417425900697708,
|
|
"signal/brier_reward/group_std_mean": 0.18280084431171417,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014174258708953858,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014174258708953858,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028596949204802512,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04525943174958229,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002859694929793477,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002859694929793477,
|
|
"signal/format_reward/centered_abs_mean": 0.016194661520421504,
|
|
"signal/format_reward/group_std_mean": 0.030201531946659088,
|
|
"signal/format_reward/group_zero_std_frac": 0.8777777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008097330760210752,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008097330760210752,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002045383723452687,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035399315878748895,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.55672955972841e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.55672955972841e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030501941591501235,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.040661169588565825,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003050194028764963,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003050194028764963,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3381875276565552,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40684131979942323,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03381875231862068,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03381875231862068,
|
|
"signal/volume_coverage_0/centered_abs_mean": 5.851382822719131e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 7.571845983544989e-10,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.851382954558116e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.851382954558116e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 5.851382822719131e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 7.571845983544989e-10,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.851382954558116e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.851382954558116e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 1.061550365388797e-08,
|
|
"signal/volume_coverage_10/group_std_mean": 1.4323466168697507e-08,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.825,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 1.0615504031363798e-09,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 1.0615504031363798e-09,
|
|
"signal/volume_coverage_15/centered_abs_mean": 2.943361390173038e-07,
|
|
"signal/volume_coverage_15/group_std_mean": 3.8099229167087855e-07,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.622222226858139,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 2.943361464557981e-08,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 2.943361464557981e-08,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.0009150244877673686,
|
|
"signal/volume_coverage_20/group_std_mean": 0.0012456974247470499,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.11111111417412758,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 9.150245241471567e-05,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 9.150245241471567e-05,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.0145043870434165,
|
|
"signal/volume_coverage_25/group_std_mean": 0.019368264079093932,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.0014504387276247145,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.0014504387276247145,
|
|
"signal/volume_coverage_5/centered_abs_mean": 5.851382822719131e-10,
|
|
"signal/volume_coverage_5/group_std_mean": 7.571845983544989e-10,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9944444417953491,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.851382954558116e-11,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 5.851382954558116e-11,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1466230797636561,
|
|
"calibration/batch_distribution_entropy": 0.964288356863465,
|
|
"calibration/buffer_distribution_entropy": 0.9780273636730223,
|
|
"calibration/confidence_entropy": 0.5103002408452058,
|
|
"calibration/coverage@0%": 0.12145725031133525,
|
|
"calibration/coverage@1%": 0.1261569892147295,
|
|
"calibration/coverage@10%": 0.47141484462912897,
|
|
"calibration/coverage@15%": 0.7015038387175796,
|
|
"calibration/coverage@20%": 0.7837500349973402,
|
|
"calibration/coverage@25%": 0.823630204104488,
|
|
"calibration/coverage@30%": 0.8379679144385026,
|
|
"calibration/coverage@5%": 0.29739208200252454,
|
|
"calibration/ece": 0.1881009639784636,
|
|
"calibration/mean_confidence": 0.5813083634195351,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015364583333333326,
|
|
"completions/max_length": 3313.0,
|
|
"completions/max_terminated_length": 3313.0,
|
|
"completions/mean_length": 827.1549682617188,
|
|
"completions/mean_terminated_length": 840.0288940429688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 265.4,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.0003019192081410438,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0119,
|
|
"num_tokens": 354287076.0,
|
|
"reward": 0.9753640413284301,
|
|
"reward_std": 0.13932546079158784,
|
|
"rewards/accuracy_reward": 0.6730034708976745,
|
|
"rewards/brier_reward": 0.7690565705299377,
|
|
"rewards/confidence_uniqueness_reward": 0.9359739542007446,
|
|
"rewards/format_reward": 0.9844618082046509,
|
|
"rewards/frontier_aurc_reward": -0.0019856867846101524,
|
|
"rewards/frontier_ece_reward": 0.0022318214061670004,
|
|
"rewards/frontier_entropy_batch_reward": -0.25475322306156156,
|
|
"rewards/volume_coverage_0": -6.172270767407229e-11,
|
|
"rewards/volume_coverage_1": -6.172270767407229e-11,
|
|
"rewards/volume_coverage_10": -2.8393437445650704e-08,
|
|
"rewards/volume_coverage_15": -4.993317070045577e-07,
|
|
"rewards/volume_coverage_20": 0.0004009470983874053,
|
|
"rewards/volume_coverage_25": 0.013652586936950683,
|
|
"rewards/volume_coverage_5": 1.4168433531795699e-08,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1557996988296509,
|
|
"signal/accuracy_reward/group_std_mean": 0.20581969022750854,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07789984941482545,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07789984941482545,
|
|
"signal/advantage_abs_mean": 0.10422182083129883,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10422182083129883,
|
|
"signal/advantage_pre_scale_std": 0.16794657409191133,
|
|
"signal/advantage_std": 0.16794657409191133,
|
|
"signal/brier_reward/centered_abs_mean": 0.15780293941497803,
|
|
"signal/brier_reward/group_std_mean": 0.20066075921058654,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015780294500291347,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015780294500291347,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03549051396548748,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05435318723320961,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003549051284790039,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003549051284790039,
|
|
"signal/format_reward/centered_abs_mean": 0.0249620221555233,
|
|
"signal/format_reward/group_std_mean": 0.04160917028784752,
|
|
"signal/format_reward/group_zero_std_frac": 0.844444465637207,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01248101107776165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01248101107776165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002445269119925797,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0041489469353109595,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.056586501770653e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.056586501770653e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03229519799351692,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.042399514466524124,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032295198645442722,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032295198645442722,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3162634313106537,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38911319971084596,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03162634521722794,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03162634521722794,
|
|
"signal/volume_coverage_0/centered_abs_mean": 5.618044016109103e-10,
|
|
"signal/volume_coverage_0/group_std_mean": 7.310222538414734e-10,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.9972222208976745,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.618044328359329e-11,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.618044328359329e-11,
|
|
"signal/volume_coverage_1/centered_abs_mean": 5.618044016109103e-10,
|
|
"signal/volume_coverage_1/group_std_mean": 7.310222538414734e-10,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.9972222208976745,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.618044328359329e-11,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.618044328359329e-11,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.5007122471952813e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 4.474250966168825e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.775,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.5007123339036994e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.5007123339036994e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 7.844799370104738e-06,
|
|
"signal/volume_coverage_15/group_std_mean": 1.0182519690715709e-05,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.3694444507360458,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.844799313261319e-07,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 7.844799313261319e-07,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.0039816807955503465,
|
|
"signal/volume_coverage_20/group_std_mean": 0.005318196397274733,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0003981680842116475,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0003981680842116475,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.023230988532304764,
|
|
"signal/volume_coverage_25/group_std_mean": 0.030292440578341483,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.002323098946362734,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.002323098946362734,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.1731535911468916e-07,
|
|
"signal/volume_coverage_5/group_std_mean": 1.4786015383183583e-07,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.9305555582046509,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.1731536444029022e-08,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.1731536444029022e-08,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1546312380703148,
|
|
"calibration/batch_distribution_entropy": 0.9767267361123911,
|
|
"calibration/buffer_distribution_entropy": 0.9821229946566788,
|
|
"calibration/confidence_entropy": 0.48704984961187714,
|
|
"calibration/coverage@0%": 0.025887868601684395,
|
|
"calibration/coverage@1%": 0.025887868601684395,
|
|
"calibration/coverage@10%": 0.5159800660623028,
|
|
"calibration/coverage@15%": 0.5914674495924496,
|
|
"calibration/coverage@20%": 0.6686515086515087,
|
|
"calibration/coverage@25%": 0.7135845778505059,
|
|
"calibration/coverage@30%": 0.9001099239326387,
|
|
"calibration/coverage@5%": 0.26043661377542954,
|
|
"calibration/ece": 0.2139093327163634,
|
|
"calibration/mean_confidence": 0.5058844434572112,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017447916666666653,
|
|
"completions/max_length": 3554.0,
|
|
"completions/max_terminated_length": 3554.0,
|
|
"completions/mean_length": 868.8214477539062,
|
|
"completions/mean_terminated_length": 884.3752075195313,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 287.0,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.0003124874783679843,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 367434971.0,
|
|
"reward": 0.9755571007728576,
|
|
"reward_std": 0.1345706507563591,
|
|
"rewards/accuracy_reward": 0.6695312619209289,
|
|
"rewards/brier_reward": 0.7570839524269104,
|
|
"rewards/confidence_uniqueness_reward": 0.9359697103500366,
|
|
"rewards/format_reward": 0.9825520753860474,
|
|
"rewards/frontier_aurc_reward": -0.0016420065890997647,
|
|
"rewards/frontier_ece_reward": 0.0011796611128374935,
|
|
"rewards/frontier_entropy_batch_reward": -0.2225494861602783,
|
|
"rewards/volume_coverage_0": -2.2794013121441027e-09,
|
|
"rewards/volume_coverage_1": -2.2794013121441027e-09,
|
|
"rewards/volume_coverage_10": -5.1920217991607843e-08,
|
|
"rewards/volume_coverage_15": 1.054141050360613e-06,
|
|
"rewards/volume_coverage_20": 0.0007828456378774718,
|
|
"rewards/volume_coverage_25": 0.022891780361533165,
|
|
"rewards/volume_coverage_5": -5.379716716547023e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14666341245174408,
|
|
"signal/accuracy_reward/group_std_mean": 0.19849115908145903,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4055555522441864,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07333170622587204,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07333170622587204,
|
|
"signal/advantage_abs_mean": 0.09981575608253479,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09981575608253479,
|
|
"signal/advantage_pre_scale_std": 0.16138841807842255,
|
|
"signal/advantage_std": 0.16138841807842255,
|
|
"signal/brier_reward/centered_abs_mean": 0.166391322016716,
|
|
"signal/brier_reward/group_std_mean": 0.21182333827018737,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01663913168013096,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01663913168013096,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0341212198138237,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05267147943377495,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034121218603104355,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034121218603104355,
|
|
"signal/format_reward/centered_abs_mean": 0.02448459193110466,
|
|
"signal/format_reward/group_std_mean": 0.041171152144670486,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01224229596555233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01224229596555233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021314293844625354,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037278625182807445,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6642868033377453e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6642868033377453e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03538916334509849,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.045856249332427976,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003538916353136301,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003538916353136301,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29995506405830386,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37324848771095276,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029995508119463922,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029995508119463922,
|
|
"signal/volume_coverage_0/centered_abs_mean": 8.71896759235824e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.1290637758065714e-08,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7361111164093017,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 8.718967769993924e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 8.718967769993924e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 8.71896759235824e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.1290637758065714e-08,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7361111164093017,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 8.718967769993924e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 8.718967769993924e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 5.929277968164115e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 7.720077775275058e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.3805555671453476,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.929278206195931e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 5.929278206195931e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 1.5034493299026507e-05,
|
|
"signal/volume_coverage_15/group_std_mean": 1.9472881831461564e-05,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.21388889122754334,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.5034493799248593e-06,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.5034493799248593e-06,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.007102501392364502,
|
|
"signal/volume_coverage_20/group_std_mean": 0.009295590687543154,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0007102501345798373,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0007102501345798373,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.03103804439306259,
|
|
"signal/volume_coverage_25/group_std_mean": 0.03991614505648613,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.003103804448619485,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.003103804448619485,
|
|
"signal/volume_coverage_5/centered_abs_mean": 2.153096625434614e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 2.8507457550119853e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.6916666746139526,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 2.15309672313424e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 2.15309672313424e-09,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17371525972343724,
|
|
"calibration/batch_distribution_entropy": 0.9461858305303827,
|
|
"calibration/buffer_distribution_entropy": 0.9854462094498718,
|
|
"calibration/confidence_entropy": 0.4846643375245122,
|
|
"calibration/coverage@0%": 0.010455004351610096,
|
|
"calibration/coverage@1%": 0.010455004351610096,
|
|
"calibration/coverage@10%": 0.35906587241032534,
|
|
"calibration/coverage@15%": 0.4774973328013402,
|
|
"calibration/coverage@20%": 0.6729397947160021,
|
|
"calibration/coverage@25%": 0.7635555132449945,
|
|
"calibration/coverage@30%": 0.8675021758050478,
|
|
"calibration/coverage@5%": 0.09661688424717145,
|
|
"calibration/ece": 0.1664582589769637,
|
|
"calibration/mean_confidence": 0.6071375677769686,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011371527777777769,
|
|
"completions/max_length": 3648.4,
|
|
"completions/max_terminated_length": 3648.4,
|
|
"completions/mean_length": 863.1968872070313,
|
|
"completions/mean_terminated_length": 873.1014892578125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 278.6,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.0003431806981097907,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0085,
|
|
"num_tokens": 380468183.0,
|
|
"reward": 1.0040875792503356,
|
|
"reward_std": 0.1347779542207718,
|
|
"rewards/accuracy_reward": 0.7197048664093018,
|
|
"rewards/brier_reward": 0.7805811762809753,
|
|
"rewards/confidence_uniqueness_reward": 0.9393272161483764,
|
|
"rewards/format_reward": 0.9886284828186035,
|
|
"rewards/frontier_aurc_reward": -0.0017156409798189999,
|
|
"rewards/frontier_ece_reward": -0.0007315105176530778,
|
|
"rewards/frontier_entropy_batch_reward": -0.2580687701702118,
|
|
"rewards/volume_coverage_0": -1.211497779873838e-09,
|
|
"rewards/volume_coverage_1": -1.211497779873838e-09,
|
|
"rewards/volume_coverage_10": -8.929526549295019e-08,
|
|
"rewards/volume_coverage_15": -1.3116566924509243e-05,
|
|
"rewards/volume_coverage_20": 0.0010870593221625313,
|
|
"rewards/volume_coverage_25": 0.037241144105792046,
|
|
"rewards/volume_coverage_5": -1.2136919802507862e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14935438632965087,
|
|
"signal/accuracy_reward/group_std_mean": 0.20225562751293183,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07467719316482543,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07467719316482543,
|
|
"signal/advantage_abs_mean": 0.09886526316404343,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09886526316404343,
|
|
"signal/advantage_pre_scale_std": 0.16056418120861055,
|
|
"signal/advantage_std": 0.16056418120861055,
|
|
"signal/brier_reward/centered_abs_mean": 0.1546865701675415,
|
|
"signal/brier_reward/group_std_mean": 0.19657468795776367,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015468657575547695,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015468657575547695,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03084472641348839,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05063636749982834,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030844727531075477,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030844727531075477,
|
|
"signal/format_reward/centered_abs_mean": 0.019156900979578496,
|
|
"signal/format_reward/group_std_mean": 0.03671438507735729,
|
|
"signal/format_reward/group_zero_std_frac": 0.8472222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009578450489789248,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009578450489789248,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002360010566189885,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004065482737496495,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9500132950488477e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9500132950488477e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.031055760383605958,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04056341871619225,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003105575917288661,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003105575917288661,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32181860208511354,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.391850072145462,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03218186013400555,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03218186013400555,
|
|
"signal/volume_coverage_0/centered_abs_mean": 9.236164411063897e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.196727463792513e-08,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7083333373069763,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 9.236164610904041e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 9.236164610904041e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 9.236164411063897e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.196727463792513e-08,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7083333373069763,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 9.236164610904041e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 9.236164610904041e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 5.15992598248971e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 6.642203786100253e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.4499999970197678,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 5.159925713371649e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 5.159925713371649e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 5.2554698049789296e-05,
|
|
"signal/volume_coverage_15/group_std_mean": 7.018936448730529e-05,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.16666666865348817,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 5.25546981862135e-06,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 5.25546981862135e-06,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.008253774605691433,
|
|
"signal/volume_coverage_20/group_std_mean": 0.010948755592107774,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0008253774838522076,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0008253774838522076,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.03802705928683281,
|
|
"signal/volume_coverage_25/group_std_mean": 0.04884639978408813,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.003802705928683281,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.003802705928683281,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.2833939466716515e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 1.6756578347099094e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.6888888955116272,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.2833940021828028e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.2833940021828028e-09,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.140991436437331,
|
|
"calibration/batch_distribution_entropy": 0.9774483483818164,
|
|
"calibration/buffer_distribution_entropy": 0.9877175559458158,
|
|
"calibration/confidence_entropy": 0.5059720102009231,
|
|
"calibration/coverage@0%": 0.045580104973068296,
|
|
"calibration/coverage@1%": 0.10364462110210056,
|
|
"calibration/coverage@10%": 0.3414710206091299,
|
|
"calibration/coverage@15%": 0.6232919464639679,
|
|
"calibration/coverage@20%": 0.8037394190618983,
|
|
"calibration/coverage@25%": 0.8891310954551734,
|
|
"calibration/coverage@30%": 0.9374775521480867,
|
|
"calibration/coverage@5%": 0.15040337345337373,
|
|
"calibration/ece": 0.19020302118642468,
|
|
"calibration/mean_confidence": 0.5479368743882117,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014322916666666652,
|
|
"completions/max_length": 3943.0,
|
|
"completions/max_terminated_length": 3943.0,
|
|
"completions/mean_length": 893.93671875,
|
|
"completions/mean_terminated_length": 906.9239624023437,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 275.0,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.0003500058373901993,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.0124,
|
|
"num_tokens": 393874302.0,
|
|
"reward": 0.9988266825675964,
|
|
"reward_std": 0.139481620490551,
|
|
"rewards/accuracy_reward": 0.71171875,
|
|
"rewards/brier_reward": 0.7771403908729553,
|
|
"rewards/confidence_uniqueness_reward": 0.9368108153343201,
|
|
"rewards/format_reward": 0.9856770873069763,
|
|
"rewards/frontier_aurc_reward": -0.0016718719620257617,
|
|
"rewards/frontier_ece_reward": -0.0001888960599899292,
|
|
"rewards/frontier_entropy_batch_reward": -0.26143977642059324,
|
|
"rewards/volume_coverage_0": -1.1183640669792938e-09,
|
|
"rewards/volume_coverage_1": -1.1183640669792938e-09,
|
|
"rewards/volume_coverage_10": -2.567473664782938e-08,
|
|
"rewards/volume_coverage_15": -6.893287354614585e-06,
|
|
"rewards/volume_coverage_20": 0.0032489079516381026,
|
|
"rewards/volume_coverage_25": 0.04593147337436676,
|
|
"rewards/volume_coverage_5": -1.3001145759972133e-08,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1520887643098831,
|
|
"signal/accuracy_reward/group_std_mean": 0.20999579429626464,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07604438215494155,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07604438215494155,
|
|
"signal/advantage_abs_mean": 0.10078130513429642,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10078130513429642,
|
|
"signal/advantage_pre_scale_std": 0.167160502076149,
|
|
"signal/advantage_std": 0.167160502076149,
|
|
"signal/brier_reward/centered_abs_mean": 0.1584286332130432,
|
|
"signal/brier_reward/group_std_mean": 0.20207290053367616,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015842863731086253,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015842863731086253,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03476654589176178,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.056284508854150775,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034766546450555325,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034766546450555325,
|
|
"signal/format_reward/centered_abs_mean": 0.02369249127805233,
|
|
"signal/format_reward/group_std_mean": 0.04311860054731369,
|
|
"signal/format_reward/group_zero_std_frac": 0.825000011920929,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011846245639026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011846245639026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022591098211705686,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003889821656048298,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8238874438102358e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8238874438102358e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03167073018848896,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.041566482931375506,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031670730095356703,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031670730095356703,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3187211215496063,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3893312394618988,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03187211267650127,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03187211267650127,
|
|
"signal/volume_coverage_0/centered_abs_mean": 6.795347695653931e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 8.810956009241978e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.8166666626930237,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.795348173049831e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.795348173049831e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 6.795347695653931e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 8.810956009241978e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.8166666626930237,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.795348173049831e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.795348173049831e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.022442399469583e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 4.0371518821302744e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.48611110746860503,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.022442296440886e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.022442296440886e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 6.898900683154352e-05,
|
|
"signal/volume_coverage_15/group_std_mean": 9.025059152918402e-05,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.22777778208255767,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 6.898901301610749e-06,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 6.898901301610749e-06,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.011228302493691444,
|
|
"signal/volume_coverage_20/group_std_mean": 0.014794117771089077,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0011228302493691445,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0011228302493691445,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.04425051659345627,
|
|
"signal/volume_coverage_25/group_std_mean": 0.05683296546339989,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.004425051528960467,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.004425051528960467,
|
|
"signal/volume_coverage_5/centered_abs_mean": 5.0608609925006934e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 6.722306373774245e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.6777777791023254,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.0608611079638875e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 5.0608611079638875e-09,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.09735049164862375,
|
|
"calibration/batch_distribution_entropy": 0.9718827549906518,
|
|
"calibration/buffer_distribution_entropy": 0.9891104431593053,
|
|
"calibration/confidence_entropy": 0.5013433986553448,
|
|
"calibration/coverage@0%": 0.0641919163548773,
|
|
"calibration/coverage@1%": 0.08208665319698255,
|
|
"calibration/coverage@10%": 0.6338338045297767,
|
|
"calibration/coverage@15%": 0.7994965859597166,
|
|
"calibration/coverage@20%": 0.8994158533623254,
|
|
"calibration/coverage@25%": 0.9608651226158038,
|
|
"calibration/coverage@30%": 0.9796875,
|
|
"calibration/coverage@5%": 0.3505137032120355,
|
|
"calibration/ece": 0.18134802593018456,
|
|
"calibration/mean_confidence": 0.5809839208489577,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016666666666666673,
|
|
"completions/max_length": 3667.4,
|
|
"completions/max_terminated_length": 3667.4,
|
|
"completions/mean_length": 848.6334350585937,
|
|
"completions/mean_terminated_length": 862.9874877929688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 287.2,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.0003335257642902434,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.0121,
|
|
"num_tokens": 406750527.0,
|
|
"reward": 0.9957963824272156,
|
|
"reward_std": 0.14223625361919404,
|
|
"rewards/accuracy_reward": 0.7073784708976746,
|
|
"rewards/brier_reward": 0.7794308066368103,
|
|
"rewards/confidence_uniqueness_reward": 0.9341415762901306,
|
|
"rewards/format_reward": 0.9833333373069764,
|
|
"rewards/frontier_aurc_reward": -0.0021487005054950715,
|
|
"rewards/frontier_ece_reward": 0.000431177020072937,
|
|
"rewards/frontier_entropy_batch_reward": -0.26810349225997926,
|
|
"rewards/volume_coverage_0": -2.0573607123486327e-10,
|
|
"rewards/volume_coverage_1": -2.0573607123486327e-10,
|
|
"rewards/volume_coverage_10": -1.6212315223640416e-08,
|
|
"rewards/volume_coverage_15": 9.642715667723678e-07,
|
|
"rewards/volume_coverage_20": 0.005197459273040295,
|
|
"rewards/volume_coverage_25": 0.05357494503259659,
|
|
"rewards/volume_coverage_5": -5.968378763432369e-10,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1573187917470932,
|
|
"signal/accuracy_reward/group_std_mean": 0.2063736468553543,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0786593958735466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0786593958735466,
|
|
"signal/advantage_abs_mean": 0.10662449449300766,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10662449449300766,
|
|
"signal/advantage_pre_scale_std": 0.17244213521480561,
|
|
"signal/advantage_std": 0.17244213521480561,
|
|
"signal/brier_reward/centered_abs_mean": 0.15907377898693084,
|
|
"signal/brier_reward/group_std_mean": 0.2006416529417038,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015907378122210502,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015907378122210502,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03644292093813419,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05790592879056931,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036442920099943876,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036442920099943876,
|
|
"signal/format_reward/centered_abs_mean": 0.02569444477558136,
|
|
"signal/format_reward/group_std_mean": 0.04491157345473766,
|
|
"signal/format_reward/group_zero_std_frac": 0.825000011920929,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01284722238779068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01284722238779068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028547207824885846,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00510807204991579,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.568401371012442e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.568401371012442e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.031456112116575244,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0408004954457283,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0031456112395972014,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0031456112395972014,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32392174005508423,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39390974044799804,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03239217437803745,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03239217437803745,
|
|
"signal/volume_coverage_0/centered_abs_mean": 6.894463333573242e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 8.965259878923603e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.894463999707056e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.894463999707056e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 6.894463333573242e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 8.965259878923603e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.894463999707056e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.894463999707056e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.0830769500435054e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 2.7283264500965745e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.4138888895511627,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.0830770885993388e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.0830770885993388e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 9.59263401455246e-05,
|
|
"signal/volume_coverage_15/group_std_mean": 0.0001241161226062104,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.1916666731238365,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 9.592634523869491e-06,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 9.592634523869491e-06,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.013449459336698055,
|
|
"signal/volume_coverage_20/group_std_mean": 0.01758615728467703,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0013449459336698055,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0013449459336698055,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.05379967465996742,
|
|
"signal/volume_coverage_25/group_std_mean": 0.06872403174638748,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.005379967298358679,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.005379967298358679,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.9899143155477362e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 2.5720825524988555e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.7027777791023254,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.989914355515765e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.989914355515765e-09,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19188778559925812,
|
|
"calibration/batch_distribution_entropy": 0.9692985646044558,
|
|
"calibration/buffer_distribution_entropy": 0.9885363470800119,
|
|
"calibration/confidence_entropy": 0.49170521564602654,
|
|
"calibration/coverage@0%": 0.007841128807246222,
|
|
"calibration/coverage@1%": 0.007841128807246222,
|
|
"calibration/coverage@10%": 0.13206599825894333,
|
|
"calibration/coverage@15%": 0.3171304564834864,
|
|
"calibration/coverage@20%": 0.6903923725390136,
|
|
"calibration/coverage@25%": 0.8874851338974479,
|
|
"calibration/coverage@30%": 0.9246073298429319,
|
|
"calibration/coverage@5%": 0.05588290426416528,
|
|
"calibration/ece": 0.22218107648375957,
|
|
"calibration/mean_confidence": 0.5590917161464007,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013194444444444463,
|
|
"completions/max_length": 3478.2,
|
|
"completions/max_terminated_length": 3478.2,
|
|
"completions/mean_length": 835.9971435546875,
|
|
"completions/mean_terminated_length": 847.218603515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 253.8,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.0003270205343142152,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.008,
|
|
"num_tokens": 419471262.0,
|
|
"reward": 0.9919918060302735,
|
|
"reward_std": 0.14110299646854402,
|
|
"rewards/accuracy_reward": 0.68828125,
|
|
"rewards/brier_reward": 0.776080322265625,
|
|
"rewards/confidence_uniqueness_reward": 0.9390787482261658,
|
|
"rewards/format_reward": 0.98671875,
|
|
"rewards/frontier_aurc_reward": -0.0019206261495128274,
|
|
"rewards/frontier_ece_reward": 0.0002056588651612401,
|
|
"rewards/frontier_entropy_batch_reward": -0.2390881210565567,
|
|
"rewards/volume_coverage_0": -9.607167283931516e-11,
|
|
"rewards/volume_coverage_1": -9.607167283931516e-11,
|
|
"rewards/volume_coverage_10": 6.878886154026986e-08,
|
|
"rewards/volume_coverage_15": 8.149280984071083e-06,
|
|
"rewards/volume_coverage_20": 0.008166126534342765,
|
|
"rewards/volume_coverage_25": 0.060707013309001925,
|
|
"rewards/volume_coverage_5": -2.027620027433841e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16101888120174407,
|
|
"signal/accuracy_reward/group_std_mean": 0.21531691253185273,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08050944060087203,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08050944060087203,
|
|
"signal/advantage_abs_mean": 0.10527712404727936,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10527712404727936,
|
|
"signal/advantage_pre_scale_std": 0.16531254947185517,
|
|
"signal/advantage_std": 0.16531254947185517,
|
|
"signal/brier_reward/centered_abs_mean": 0.1594757229089737,
|
|
"signal/brier_reward/group_std_mean": 0.202311235666275,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015947572141885757,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015947572141885757,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03095320761203766,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05040250569581985,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030953207984566688,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030953207984566688,
|
|
"signal/format_reward/centered_abs_mean": 0.020122612453997137,
|
|
"signal/format_reward/group_std_mean": 0.03736539520323277,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010061306226998568,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010061306226998568,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025723907630890606,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004795023193582893,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.215488541172817e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.215488541172817e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.032742565497756004,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04218977615237236,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032742566429078577,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032742566429078577,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3048552870750427,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37814642786979674,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030485530197620393,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030485530197620393,
|
|
"signal/volume_coverage_0/centered_abs_mean": 6.561989396303147e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 8.45127092752307e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.561989479569874e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.561989479569874e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 6.561989396303147e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 8.45127092752307e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7694444537162781,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.561989479569874e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.561989479569874e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.432544644965674e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 4.456041097000707e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.28333333432674407,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.432544453119135e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.432544453119135e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.0001473607844673097,
|
|
"signal/volume_coverage_15/group_std_mean": 0.00019077141769230366,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.2194444477558136,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 1.4736078446730971e-05,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 1.4736078446730971e-05,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.016519613564014435,
|
|
"signal/volume_coverage_20/group_std_mean": 0.021331942826509475,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0016519613796845078,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0016519613796845078,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.05676387697458267,
|
|
"signal/volume_coverage_25/group_std_mean": 0.07344103008508682,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.005676387995481491,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.005676387995481491,
|
|
"signal/volume_coverage_5/centered_abs_mean": 1.644520555288409e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 2.145805164666115e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.6805555582046509,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 1.6445205613946356e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 1.6445205613946356e-09,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16563327498860908,
|
|
"calibration/batch_distribution_entropy": 0.9669831065238427,
|
|
"calibration/buffer_distribution_entropy": 0.9883207647652302,
|
|
"calibration/confidence_entropy": 0.49194281575766324,
|
|
"calibration/coverage@0%": 0.023620188208345654,
|
|
"calibration/coverage@1%": 0.023620188208345654,
|
|
"calibration/coverage@10%": 0.3429785884142111,
|
|
"calibration/coverage@15%": 0.4899246732300961,
|
|
"calibration/coverage@20%": 0.6019432235082157,
|
|
"calibration/coverage@25%": 0.9215743919738493,
|
|
"calibration/coverage@30%": 0.9895259186351705,
|
|
"calibration/coverage@5%": 0.17279849807400702,
|
|
"calibration/ece": 0.20003581041724522,
|
|
"calibration/mean_confidence": 0.5874865962793766,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00894097222222221,
|
|
"completions/max_length": 3464.2,
|
|
"completions/max_terminated_length": 3464.2,
|
|
"completions/mean_length": 830.3328247070312,
|
|
"completions/mean_terminated_length": 837.8548095703125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 262.8,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.00036124690086580813,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0063,
|
|
"num_tokens": 432119640.0,
|
|
"reward": 1.01535884141922,
|
|
"reward_std": 0.13885502517223358,
|
|
"rewards/accuracy_reward": 0.7321180582046509,
|
|
"rewards/brier_reward": 0.7801229596138001,
|
|
"rewards/confidence_uniqueness_reward": 0.9418362855911255,
|
|
"rewards/format_reward": 0.9910590291023255,
|
|
"rewards/frontier_aurc_reward": -0.0021081025479361415,
|
|
"rewards/frontier_ece_reward": -0.0038828586577437816,
|
|
"rewards/frontier_entropy_batch_reward": -0.26736214458942414,
|
|
"rewards/volume_coverage_0": -1.141676597010699e-09,
|
|
"rewards/volume_coverage_1": -1.141676597010699e-09,
|
|
"rewards/volume_coverage_10": -1.2779696589859668e-08,
|
|
"rewards/volume_coverage_15": -9.471884404774756e-05,
|
|
"rewards/volume_coverage_20": 0.010956103447824717,
|
|
"rewards/volume_coverage_25": 0.07639060616493225,
|
|
"rewards/volume_coverage_5": -8.485374047850769e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1593641459941864,
|
|
"signal/accuracy_reward/group_std_mean": 0.2131967216730118,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37222222685813905,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0796820729970932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0796820729970932,
|
|
"signal/advantage_abs_mean": 0.10268108397722245,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10268108397722245,
|
|
"signal/advantage_pre_scale_std": 0.1636344462633133,
|
|
"signal/advantage_std": 0.1636344462633133,
|
|
"signal/brier_reward/centered_abs_mean": 0.15661969482898713,
|
|
"signal/brier_reward/group_std_mean": 0.19965132474899291,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015661969408392908,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015661969408392908,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02808639667928219,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04723411276936531,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028086398728191853,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028086398728191853,
|
|
"signal/format_reward/centered_abs_mean": 0.016227213107049464,
|
|
"signal/format_reward/group_std_mean": 0.033081219717860225,
|
|
"signal/format_reward/group_zero_std_frac": 0.8583333492279053,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008113606553524732,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008113606553524732,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00276075005531311,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004973709024488926,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4509377292124556e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4509377292124556e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.030482398346066475,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.039097010344266894,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003048239927738905,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003048239927738905,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3309023678302765,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40246840119361876,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03309023603796959,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03309023603796959,
|
|
"signal/volume_coverage_0/centered_abs_mean": 6.612283298679245e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 8.463888301335488e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.725000011920929,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.612283387497087e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.612283387497087e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 6.612283298679245e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 8.463888301335488e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.725000011920929,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.612283387497087e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.612283387497087e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.15068140821495e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 3.9909486417855076e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.3861111134290695,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.150681564534352e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.150681564534352e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.000732619600603357,
|
|
"signal/volume_coverage_15/group_std_mean": 0.0009918127965647728,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.03611111212521791,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 7.326196137000807e-05,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 7.326196137000807e-05,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.01917654536664486,
|
|
"signal/volume_coverage_20/group_std_mean": 0.024732422083616257,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.001917654532007873,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.001917654532007873,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.06703869476914406,
|
|
"signal/volume_coverage_25/group_std_mean": 0.08638201355934143,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.006703869812190533,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.006703869812190533,
|
|
"signal/volume_coverage_5/centered_abs_mean": 3.047114027765474e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 3.937505539397535e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.6055555582046509,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 3.047113938947632e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 3.047113938947632e-09,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18204306394164244,
|
|
"calibration/batch_distribution_entropy": 0.9729749328000548,
|
|
"calibration/buffer_distribution_entropy": 0.9885560786006696,
|
|
"calibration/confidence_entropy": 0.49102789876075714,
|
|
"calibration/coverage@0%": 0.017981751165717673,
|
|
"calibration/coverage@1%": 0.017981751165717673,
|
|
"calibration/coverage@10%": 0.35273791173668567,
|
|
"calibration/coverage@15%": 0.45626031578387644,
|
|
"calibration/coverage@20%": 0.6275733840818873,
|
|
"calibration/coverage@25%": 0.8232062883198724,
|
|
"calibration/coverage@30%": 0.8748987198185059,
|
|
"calibration/coverage@5%": 0.07761235802587599,
|
|
"calibration/ece": 0.1691368534184712,
|
|
"calibration/mean_confidence": 0.5745053270312936,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01614583333333335,
|
|
"completions/max_length": 3499.8,
|
|
"completions/max_terminated_length": 3499.8,
|
|
"completions/mean_length": 829.2237915039062,
|
|
"completions/mean_terminated_length": 842.9637573242187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 260.8,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.0003712046018335968,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0116,
|
|
"num_tokens": 444753162.0,
|
|
"reward": 0.9810139179229737,
|
|
"reward_std": 0.1448762148618698,
|
|
"rewards/accuracy_reward": 0.6711805582046508,
|
|
"rewards/brier_reward": 0.7673219919204712,
|
|
"rewards/confidence_uniqueness_reward": 0.9353937387466431,
|
|
"rewards/format_reward": 0.9837673664093017,
|
|
"rewards/frontier_aurc_reward": -0.0024833133444190024,
|
|
"rewards/frontier_ece_reward": -0.0005498200946021826,
|
|
"rewards/frontier_entropy_batch_reward": -0.25182714462280276,
|
|
"rewards/volume_coverage_0": -5.590650969944377e-10,
|
|
"rewards/volume_coverage_1": -5.590650969944377e-10,
|
|
"rewards/volume_coverage_10": -2.345436207473739e-08,
|
|
"rewards/volume_coverage_15": 5.4893085780349794e-05,
|
|
"rewards/volume_coverage_20": 0.013210531510412692,
|
|
"rewards/volume_coverage_25": 0.07210558652877808,
|
|
"rewards/volume_coverage_5": -4.368226069750847e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15710720419883728,
|
|
"signal/accuracy_reward/group_std_mean": 0.2115005522966385,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3833333313465118,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07855360209941864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07855360209941864,
|
|
"signal/advantage_abs_mean": 0.10682297945022583,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10682297945022583,
|
|
"signal/advantage_pre_scale_std": 0.17181708216667174,
|
|
"signal/advantage_std": 0.17181708216667174,
|
|
"signal/brier_reward/centered_abs_mean": 0.15967849493026734,
|
|
"signal/brier_reward/group_std_mean": 0.20298723876476288,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015967848524451256,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015967848524451256,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034313973411917685,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0535574808716774,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003431397257372737,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003431397257372737,
|
|
"signal/format_reward/centered_abs_mean": 0.02379014752805233,
|
|
"signal/format_reward/group_std_mean": 0.040867094323039053,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444537162781,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011895073764026164,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011895073764026164,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00311872442252934,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00557683790102601,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8984056300250816e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8984056300250816e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02924216091632843,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03790022060275078,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00292421611957252,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00292421611957252,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30970343947410583,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38179963231086733,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030970345064997674,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030970345064997674,
|
|
"signal/volume_coverage_0/centered_abs_mean": 7.757238851269221e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 1.0284649754055408e-08,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.6805555582046509,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.757239073313826e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 7.757239073313826e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 7.757238851269221e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 1.0284649754055408e-08,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.6805555582046509,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.757239073313826e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 7.757239073313826e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.1254682539838543e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 4.123982932924264e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.35277777910232544,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.125468204245863e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.125468204245863e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.0020562252262607216,
|
|
"signal/volume_coverage_15/group_std_mean": 0.002781915059313178,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.00020562254067044706,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.00020562254067044706,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.020905856043100357,
|
|
"signal/volume_coverage_20/group_std_mean": 0.02738172821700573,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.002090585697442293,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.002090585697442293,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.07110105603933334,
|
|
"signal/volume_coverage_25/group_std_mean": 0.09250357747077942,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.00711010554805398,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.00711010554805398,
|
|
"signal/volume_coverage_5/centered_abs_mean": 4.895319065667536e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 6.477631941947948e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.5333333373069763,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.895319172248947e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.895319172248947e-09,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17619245644327491,
|
|
"calibration/batch_distribution_entropy": 0.9541203111935174,
|
|
"calibration/buffer_distribution_entropy": 0.9881714641504811,
|
|
"calibration/confidence_entropy": 0.507531712440415,
|
|
"calibration/coverage@0%": 0.013553948344807684,
|
|
"calibration/coverage@1%": 0.013553948344807684,
|
|
"calibration/coverage@10%": 0.22703562373747785,
|
|
"calibration/coverage@15%": 0.37528082427196596,
|
|
"calibration/coverage@20%": 0.6159289333624963,
|
|
"calibration/coverage@25%": 0.9096808862433863,
|
|
"calibration/coverage@30%": 0.9412698412698413,
|
|
"calibration/coverage@5%": 0.15962724677412704,
|
|
"calibration/ece": 0.1774361783998174,
|
|
"calibration/mean_confidence": 0.6063874822733339,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008420138888888862,
|
|
"completions/max_length": 3395.8,
|
|
"completions/max_terminated_length": 3395.8,
|
|
"completions/mean_length": 813.6625854492188,
|
|
"completions/mean_terminated_length": 820.6150024414062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 268.2,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.00035624156589619815,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0055,
|
|
"num_tokens": 457194363.0,
|
|
"reward": 1.0029555439949036,
|
|
"reward_std": 0.13360524475574492,
|
|
"rewards/accuracy_reward": 0.7014756917953491,
|
|
"rewards/brier_reward": 0.7836914658546448,
|
|
"rewards/confidence_uniqueness_reward": 0.9424945712089539,
|
|
"rewards/format_reward": 0.9915798544883728,
|
|
"rewards/frontier_aurc_reward": -0.0024135842453688384,
|
|
"rewards/frontier_ece_reward": -0.001841819501714781,
|
|
"rewards/frontier_entropy_batch_reward": -0.2604458272457123,
|
|
"rewards/volume_coverage_0": -2.4506557894099502e-11,
|
|
"rewards/volume_coverage_1": -2.4506557894099502e-11,
|
|
"rewards/volume_coverage_10": 1.3166838286338133e-09,
|
|
"rewards/volume_coverage_15": 0.00029627673065988345,
|
|
"rewards/volume_coverage_20": 0.016802585497498513,
|
|
"rewards/volume_coverage_25": 0.0835825502872467,
|
|
"rewards/volume_coverage_5": 1.605325672393576e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14759657382965088,
|
|
"signal/accuracy_reward/group_std_mean": 0.19293810725212096,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4555555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07379828691482544,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07379828691482544,
|
|
"signal/advantage_abs_mean": 0.10026623159646988,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10026623159646988,
|
|
"signal/advantage_pre_scale_std": 0.15953322649002075,
|
|
"signal/advantage_std": 0.15953322649002075,
|
|
"signal/brier_reward/centered_abs_mean": 0.15002098083496093,
|
|
"signal/brier_reward/group_std_mean": 0.1909335136413574,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015002098679542542,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015002098679542542,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025808610394597052,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04277213215827942,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002580861235037446,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002580861235037446,
|
|
"signal/format_reward/centered_abs_mean": 0.014534505270421504,
|
|
"signal/format_reward/group_std_mean": 0.029166242480278014,
|
|
"signal/format_reward/group_zero_std_frac": 0.875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007267252635210752,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.007267252635210752,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030365238897502424,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005425933655351401,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.795655138674192e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.795655138674192e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02757507599890232,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03539729043841362,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002757507748901844,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002757507748901844,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31177434921264646,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3830597996711731,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03117743618786335,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03117743618786335,
|
|
"signal/volume_coverage_0/centered_abs_mean": 5.777263822892564e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 7.668372603575335e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7388888955116272,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 5.777263667461341e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 5.777263667461341e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 5.777263822892564e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 7.668372603575335e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7388888955116272,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 5.777263667461341e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 5.777263667461341e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 2.574304829749963e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 3.39649250236107e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.31944445371627805,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 2.5743048936988088e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 2.5743048936988088e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.0032336109317839144,
|
|
"signal/volume_coverage_15/group_std_mean": 0.004335348587483167,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.00032336109434254466,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.00032336109434254466,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.022926723957061766,
|
|
"signal/volume_coverage_20/group_std_mean": 0.0293125681579113,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.00229267249815166,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.00229267249815166,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.07852394431829453,
|
|
"signal/volume_coverage_25/group_std_mean": 0.10077835321426391,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.007852394692599773,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.007852394692599773,
|
|
"signal/volume_coverage_5/centered_abs_mean": 5.1238904319461655e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 6.800436693765733e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.4333333373069763,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 5.123890467473302e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 5.123890467473302e-09,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.1729246337459238,
|
|
"eval_calibration/batch_distribution_entropy": 0.908734295978599,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9873650506140915,
|
|
"eval_calibration/confidence_entropy": 0.4987768570492685,
|
|
"eval_calibration/coverage@0%": 0.15104166666666666,
|
|
"eval_calibration/coverage@1%": 0.15104166666666666,
|
|
"eval_calibration/coverage@10%": 0.375,
|
|
"eval_calibration/coverage@15%": 0.5677083333333334,
|
|
"eval_calibration/coverage@20%": 0.7916666666666666,
|
|
"eval_calibration/coverage@25%": 0.9322916666666666,
|
|
"eval_calibration/coverage@30%": 0.96875,
|
|
"eval_calibration/coverage@5%": 0.21875,
|
|
"eval_calibration/ece": 0.25312968750000003,
|
|
"eval_calibration/mean_confidence": 0.5731484375,
|
|
"eval_completions/clipped_ratio": 0.008680555555555561,
|
|
"eval_completions/max_length": 2195.6666666666665,
|
|
"eval_completions/max_terminated_length": 2195.6666666666665,
|
|
"eval_completions/mean_length": 804.9442952473959,
|
|
"eval_completions/mean_terminated_length": 812.1565144856771,
|
|
"eval_completions/min_length": 130.66666666666666,
|
|
"eval_completions/min_terminated_length": 333.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 457194363.0,
|
|
"eval_reward": 0.9083906412124634,
|
|
"eval_reward_std": 0.260540634393692,
|
|
"eval_rewards/accuracy_reward": 0.6718749900658926,
|
|
"eval_rewards/brier_reward": 0.7771714429060618,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8881837427616119,
|
|
"eval_rewards/format_reward": 0.9904513955116272,
|
|
"eval_rewards/frontier_aurc_reward": -0.002305791092415651,
|
|
"eval_rewards/frontier_ece_reward": -0.00047329728355786454,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9904513955116272,
|
|
"eval_rewards/volume_coverage_0": 2.7870893556010873e-10,
|
|
"eval_rewards/volume_coverage_1": 2.7870893556010873e-10,
|
|
"eval_rewards/volume_coverage_10": 2.718800632776445e-08,
|
|
"eval_rewards/volume_coverage_15": 0.0007767819847686042,
|
|
"eval_rewards/volume_coverage_20": 0.017239811054120462,
|
|
"eval_rewards/volume_coverage_25": 0.08011540894707044,
|
|
"eval_rewards/volume_coverage_5": 2.873590402667029e-09,
|
|
"eval_runtime": 186.5897,
|
|
"eval_samples_per_second": 5.359,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4265408019224803,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.46809791525204975,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21327040096124014,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21327040096124014,
|
|
"eval_signal/advantage_abs_mean": 0.23001150538523993,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.23001150538523993,
|
|
"eval_signal/advantage_pre_scale_std": 0.2585917264223099,
|
|
"eval_signal/advantage_std": 0.2585917264223099,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1993307818969091,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2538089131315549,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019933079058925312,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019933079058925312,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.051736210783322654,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0815204003204902,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0051736211171373725,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0051736211171373725,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.018391927083333332,
|
|
"eval_signal/format_reward/group_std_mean": 0.051025692373514175,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.009195963541666666,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.009195963541666666,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003810435184277594,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007476490068559845,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.763044307765085e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.763044307765085e-05,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.036483422542611756,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.050309122850497566,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036483421766509614,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036483421766509614,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.018391927083333332,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.051025692373514175,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0018391927975850801,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0018391927975850801,
|
|
"eval_signal/volume_coverage_0/centered_abs_mean": 7.0012612004196244e-09,
|
|
"eval_signal/volume_coverage_0/group_std_mean": 9.86357751031619e-09,
|
|
"eval_signal/volume_coverage_0/group_zero_std_frac": 0.6666666865348816,
|
|
"eval_signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 7.00126142246423e-10,
|
|
"eval_signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_0/weighted_centered_abs_mean": 7.00126142246423e-10,
|
|
"eval_signal/volume_coverage_1/centered_abs_mean": 7.0012612004196244e-09,
|
|
"eval_signal/volume_coverage_1/group_std_mean": 9.86357751031619e-09,
|
|
"eval_signal/volume_coverage_1/group_zero_std_frac": 0.6666666865348816,
|
|
"eval_signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 7.00126142246423e-10,
|
|
"eval_signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_1/weighted_centered_abs_mean": 7.00126142246423e-10,
|
|
"eval_signal/volume_coverage_10/centered_abs_mean": 4.954252356507519e-07,
|
|
"eval_signal/volume_coverage_10/group_std_mean": 6.970840994805864e-07,
|
|
"eval_signal/volume_coverage_10/group_zero_std_frac": 0.2777777860562007,
|
|
"eval_signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 4.954252534143203e-08,
|
|
"eval_signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_10/weighted_centered_abs_mean": 4.954252534143203e-08,
|
|
"eval_signal/volume_coverage_15/centered_abs_mean": 0.005268752574920654,
|
|
"eval_signal/volume_coverage_15/group_std_mean": 0.00819743393609921,
|
|
"eval_signal/volume_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.0005268752429401502,
|
|
"eval_signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_15/weighted_centered_abs_mean": 0.0005268752429401502,
|
|
"eval_signal/volume_coverage_20/centered_abs_mean": 0.031609114880363144,
|
|
"eval_signal/volume_coverage_20/group_std_mean": 0.04171175882220268,
|
|
"eval_signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.003160911495797336,
|
|
"eval_signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_20/weighted_centered_abs_mean": 0.003160911495797336,
|
|
"eval_signal/volume_coverage_25/centered_abs_mean": 0.12539048989613852,
|
|
"eval_signal/volume_coverage_25/group_std_mean": 0.1601824959119161,
|
|
"eval_signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.012539049455275139,
|
|
"eval_signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_25/weighted_centered_abs_mean": 0.012539049455275139,
|
|
"eval_signal/volume_coverage_5/centered_abs_mean": 7.264358714564878e-08,
|
|
"eval_signal/volume_coverage_5/group_std_mean": 1.0225969523010765e-07,
|
|
"eval_signal/volume_coverage_5/group_zero_std_frac": 0.3333333432674408,
|
|
"eval_signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.26435896621543e-09,
|
|
"eval_signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"eval_signal/volume_coverage_5/weighted_centered_abs_mean": 7.26435896621543e-09,
|
|
"eval_steps_per_second": 0.032,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16806944583325462,
|
|
"calibration/batch_distribution_entropy": 0.9493498137992444,
|
|
"calibration/buffer_distribution_entropy": 0.9872563989166057,
|
|
"calibration/confidence_entropy": 0.4816480546128011,
|
|
"calibration/coverage@0%": 0.020008968258164346,
|
|
"calibration/coverage@1%": 0.020008968258164346,
|
|
"calibration/coverage@10%": 0.20190446255225694,
|
|
"calibration/coverage@15%": 0.5117778930717993,
|
|
"calibration/coverage@20%": 0.8431347047800399,
|
|
"calibration/coverage@25%": 0.9027516403741449,
|
|
"calibration/coverage@30%": 0.9512983386849087,
|
|
"calibration/coverage@5%": 0.023683508940579044,
|
|
"calibration/ece": 0.1584036925844378,
|
|
"calibration/mean_confidence": 0.6165435627133257,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011979166666666674,
|
|
"completions/max_length": 3255.8,
|
|
"completions/max_terminated_length": 3255.8,
|
|
"completions/mean_length": 825.996875,
|
|
"completions/mean_terminated_length": 836.0012451171875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 253.2,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.0002999586286023259,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0083,
|
|
"num_tokens": 469775799.0,
|
|
"reward": 1.0257537722587586,
|
|
"reward_std": 0.136698442697525,
|
|
"rewards/accuracy_reward": 0.7496527791023254,
|
|
"rewards/brier_reward": 0.7866234183311462,
|
|
"rewards/confidence_uniqueness_reward": 0.9380228757858277,
|
|
"rewards/format_reward": 0.9880208253860474,
|
|
"rewards/frontier_aurc_reward": -0.0017146203899756074,
|
|
"rewards/frontier_ece_reward": -0.005589825892820954,
|
|
"rewards/frontier_entropy_batch_reward": -0.2817916065454483,
|
|
"rewards/volume_coverage_0": -1.4832471251224888e-09,
|
|
"rewards/volume_coverage_1": -1.4832471251224888e-09,
|
|
"rewards/volume_coverage_10": -1.1369402130867456e-07,
|
|
"rewards/volume_coverage_15": 0.00047319423174485564,
|
|
"rewards/volume_coverage_20": 0.0234893973916769,
|
|
"rewards/volume_coverage_25": 0.10815636962652206,
|
|
"rewards/volume_coverage_5": -1.2138094973579427e-08,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14657118022441865,
|
|
"signal/accuracy_reward/group_std_mean": 0.2037561982870102,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07328559011220932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07328559011220932,
|
|
"signal/advantage_abs_mean": 0.09813274145126342,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09813274145126342,
|
|
"signal/advantage_pre_scale_std": 0.1621260464191437,
|
|
"signal/advantage_std": 0.1621260464191437,
|
|
"signal/brier_reward/centered_abs_mean": 0.14790882170200348,
|
|
"signal/brier_reward/group_std_mean": 0.18837517201900483,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014790883474051952,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014790883474051952,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030224530026316643,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04915469288825989,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003022453049197793,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003022453049197793,
|
|
"signal/format_reward/centered_abs_mean": 0.01829427070915699,
|
|
"signal/format_reward/group_std_mean": 0.034758422523736954,
|
|
"signal/format_reward/group_zero_std_frac": 0.8555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009147135354578496,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009147135354578496,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022363578900694847,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004022621084004641,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.79544747172622e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.79544747172622e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02730635106563568,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03513160794973373,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027306349482387306,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027306349482387306,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33183044791221616,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4021769523620605,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03318304568529129,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03318304568529129,
|
|
"signal/volume_coverage_0/centered_abs_mean": 1.1628204266145303e-08,
|
|
"signal/volume_coverage_0/group_std_mean": 1.5069850789473095e-08,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7361111044883728,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 1.1628204399372067e-09,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 1.1628204399372067e-09,
|
|
"signal/volume_coverage_1/centered_abs_mean": 1.1628204266145303e-08,
|
|
"signal/volume_coverage_1/group_std_mean": 1.5069850789473095e-08,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7361111044883728,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 1.1628204399372067e-09,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 1.1628204399372067e-09,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.7749450711999086e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 5.002540035548009e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.3416666805744171,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.7749452275193105e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.7749452275193105e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.004773548245429993,
|
|
"signal/volume_coverage_15/group_std_mean": 0.006471954379230737,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.0004773548396769911,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.0004773548396769911,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.0273191150277853,
|
|
"signal/volume_coverage_20/group_std_mean": 0.03483609855175018,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0027319115120917558,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0027319115120917558,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.08157561272382736,
|
|
"signal/volume_coverage_25/group_std_mean": 0.10533483922481537,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.008157561719417571,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.008157561719417571,
|
|
"signal/volume_coverage_5/centered_abs_mean": 7.86729152935095e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 1.0281165501169198e-07,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.4361111164093018,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 7.867291351715266e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 7.867291351715266e-09,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10158368325560434,
|
|
"calibration/batch_distribution_entropy": 0.9546674144846191,
|
|
"calibration/buffer_distribution_entropy": 0.9870036950060173,
|
|
"calibration/confidence_entropy": 0.49870306686443894,
|
|
"calibration/coverage@0%": 0.0479565659189188,
|
|
"calibration/coverage@1%": 0.0479565659189188,
|
|
"calibration/coverage@10%": 0.5728609899799051,
|
|
"calibration/coverage@15%": 0.7681936033609923,
|
|
"calibration/coverage@20%": 0.886773839304836,
|
|
"calibration/coverage@25%": 0.9677753019926273,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.3374138221717951,
|
|
"calibration/ece": 0.14664376764987133,
|
|
"calibration/mean_confidence": 0.6115497295637312,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.008969907407407404,
|
|
"completions/max_length": 3451.0,
|
|
"completions/max_terminated_length": 3451.0,
|
|
"completions/mean_length": 820.9849446614584,
|
|
"completions/mean_terminated_length": 828.4159952799479,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 248.33333333333334,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 477316495.0,
|
|
"reward": 1.004579246044159,
|
|
"reward_std": 0.13553029050429663,
|
|
"rewards/accuracy_reward": 0.6993634303410848,
|
|
"rewards/brier_reward": 0.7872058550516764,
|
|
"rewards/confidence_uniqueness_reward": 0.941747784614563,
|
|
"rewards/format_reward": 0.9910300970077515,
|
|
"rewards/frontier_aurc_reward": -0.0019426853007947404,
|
|
"rewards/frontier_ece_reward": -0.0012536543266226847,
|
|
"rewards/frontier_entropy_batch_reward": -0.26403993864854175,
|
|
"rewards/volume_coverage_0": -6.887284076384503e-10,
|
|
"rewards/volume_coverage_1": -6.887284076384503e-10,
|
|
"rewards/volume_coverage_10": -5.683458681460252e-08,
|
|
"rewards/volume_coverage_15": 0.0013588267417314153,
|
|
"rewards/volume_coverage_20": 0.025538019835948944,
|
|
"rewards/volume_coverage_25": 0.10351060579220454,
|
|
"rewards/volume_coverage_5": -7.1533599784364315e-09,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1471534992257754,
|
|
"signal/accuracy_reward/group_std_mean": 0.19441807766755423,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.44907407959302265,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0735767496128877,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0735767496128877,
|
|
"signal/advantage_abs_mean": 0.09917695571978886,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09917695571978886,
|
|
"signal/advantage_pre_scale_std": 0.16188943882783255,
|
|
"signal/advantage_std": 0.16188943882783255,
|
|
"signal/brier_reward/centered_abs_mean": 0.14869935313860574,
|
|
"signal/brier_reward/group_std_mean": 0.18970499436060587,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014869935810565948,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014869935810565948,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02755103384455045,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04898699869712194,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027551034775873027,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027551034775873027,
|
|
"signal/format_reward/centered_abs_mean": 0.016511140080789726,
|
|
"signal/format_reward/group_std_mean": 0.03586030130585035,
|
|
"signal/format_reward/group_zero_std_frac": 0.8379629651705424,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008255570040394863,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.008255570040394863,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025833341448257365,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004829682254542907,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.229167790171535e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.229167790171535e-05,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.027258147795995075,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03482848281661669,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002725814857209722,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002725814857209722,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3093116283416748,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3818712929884593,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030931161095698673,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030931161095698673,
|
|
"signal/volume_coverage_0/centered_abs_mean": 6.647302116154681e-09,
|
|
"signal/volume_coverage_0/group_std_mean": 8.50487754296599e-09,
|
|
"signal/volume_coverage_0/group_zero_std_frac": 0.7314814925193787,
|
|
"signal/volume_coverage_0/scaled_weighted_centered_abs_mean": 6.647302153162116e-10,
|
|
"signal/volume_coverage_0/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_0/weighted_centered_abs_mean": 6.647302153162116e-10,
|
|
"signal/volume_coverage_1/centered_abs_mean": 6.647302116154681e-09,
|
|
"signal/volume_coverage_1/group_std_mean": 8.50487754296599e-09,
|
|
"signal/volume_coverage_1/group_zero_std_frac": 0.7314814925193787,
|
|
"signal/volume_coverage_1/scaled_weighted_centered_abs_mean": 6.647302153162116e-10,
|
|
"signal/volume_coverage_1/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_1/weighted_centered_abs_mean": 6.647302153162116e-10,
|
|
"signal/volume_coverage_10/centered_abs_mean": 3.1564475951502874e-07,
|
|
"signal/volume_coverage_10/group_std_mean": 4.0169316169643327e-07,
|
|
"signal/volume_coverage_10/group_zero_std_frac": 0.34259260694185895,
|
|
"signal/volume_coverage_10/scaled_weighted_centered_abs_mean": 3.156447606992666e-08,
|
|
"signal/volume_coverage_10/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_10/weighted_centered_abs_mean": 3.156447606992666e-08,
|
|
"signal/volume_coverage_15/centered_abs_mean": 0.006117678868273894,
|
|
"signal/volume_coverage_15/group_std_mean": 0.007960582462449869,
|
|
"signal/volume_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_15/scaled_weighted_centered_abs_mean": 0.0006117678907079002,
|
|
"signal/volume_coverage_15/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_15/weighted_centered_abs_mean": 0.0006117678907079002,
|
|
"signal/volume_coverage_20/centered_abs_mean": 0.02946065676709016,
|
|
"signal/volume_coverage_20/group_std_mean": 0.03762132550279299,
|
|
"signal/volume_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_20/scaled_weighted_centered_abs_mean": 0.0029460658940176168,
|
|
"signal/volume_coverage_20/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_20/weighted_centered_abs_mean": 0.0029460658940176168,
|
|
"signal/volume_coverage_25/centered_abs_mean": 0.08665728569030762,
|
|
"signal/volume_coverage_25/group_std_mean": 0.11248831450939178,
|
|
"signal/volume_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/volume_coverage_25/scaled_weighted_centered_abs_mean": 0.008665728693207106,
|
|
"signal/volume_coverage_25/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_25/weighted_centered_abs_mean": 0.008665728693207106,
|
|
"signal/volume_coverage_5/centered_abs_mean": 4.9763441959764045e-08,
|
|
"signal/volume_coverage_5/group_std_mean": 6.365335754784003e-08,
|
|
"signal/volume_coverage_5/group_zero_std_frac": 0.41203704476356506,
|
|
"signal/volume_coverage_5/scaled_weighted_centered_abs_mean": 4.976344373612089e-09,
|
|
"signal/volume_coverage_5/weight": 0.10000000149011612,
|
|
"signal/volume_coverage_5/weighted_centered_abs_mean": 4.976344373612089e-09,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.008794238732662052,
|
|
"train_runtime": 40876.3537,
|
|
"train_samples_per_second": 0.367,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 477316495,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|