Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-ece10-cold-math Source: Original Platform
7368 lines
475 KiB
JSON
7368 lines
475 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.49919376007799904,
|
|
"eval_steps": 50,
|
|
"global_step": 208,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.4906669495331548,
|
|
"calibration/batch_distribution_entropy": 0.2772102231626003,
|
|
"calibration/batch_entropy_100bins": 0.3506612698092768,
|
|
"calibration/batch_entropy_10bins": 0.2772102231626003,
|
|
"calibration/batch_entropy_50bins": 0.40836954215900895,
|
|
"calibration/batch_uniqueness": 0.5106568545813284,
|
|
"calibration/confidence_entropy": 0.22095930903324054,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.45256316983360323,
|
|
"calibration/mean_confidence": 0.9168870637225405,
|
|
"calibration/prompt_uniqueness": 0.3720936191585115,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020138888888888908,
|
|
"completions/max_length": 4041.4,
|
|
"completions/max_terminated_length": 4041.4,
|
|
"completions/mean_length": 522.3839477539062,
|
|
"completions/mean_terminated_length": 533.1372802734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.011999850001874977,
|
|
"grad_norm": 0.0036375881172716618,
|
|
"learning_rate": 5.952380952380953e-07,
|
|
"loss": 0.0046,
|
|
"num_tokens": 9132071.0,
|
|
"reward": 0.4900794804096222,
|
|
"reward_std": 0.45279757380485536,
|
|
"rewards/accuracy_reward": 0.262673607468605,
|
|
"rewards/brier_reward": 0.31377317309379577,
|
|
"rewards/confidence_uniqueness_reward": 0.29045385122299194,
|
|
"rewards/format_reward": 0.6011284708976745,
|
|
"rewards/frontier_aurc_reward": 0.27633480429649354,
|
|
"rewards/frontier_coverage_0": 0.27633480429649354,
|
|
"rewards/frontier_coverage_1": 0.27633480429649354,
|
|
"rewards/frontier_coverage_10": 0.27633480429649354,
|
|
"rewards/frontier_coverage_15": 0.27633480429649354,
|
|
"rewards/frontier_coverage_20": 0.27633480429649354,
|
|
"rewards/frontier_coverage_25": 0.27633480429649354,
|
|
"rewards/frontier_coverage_5": 0.27633480429649354,
|
|
"rewards/frontier_ece_reward": 0.27633480429649354,
|
|
"rewards/frontier_entropy_batch_reward": -0.575112247467041,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3098524272441864,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23923611111111112,
|
|
"signal/accuracy_reward/group_std_mean": 0.3695395112037659,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.08611111268401146,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1549262136220932,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1549262136220932,
|
|
"signal/advantage_abs_mean": 0.38927987217903137,
|
|
"signal/advantage_pre_scale_abs_mean": 0.38927987217903137,
|
|
"signal/advantage_pre_scale_std": 0.4591569066047668,
|
|
"signal/advantage_std": 0.4591569066047668,
|
|
"signal/brier_reward/centered_abs_mean": 0.3203325092792511,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5138888888888888,
|
|
"signal/brier_reward/group_std_mean": 0.3733829379081726,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03203325048089027,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03203325048089027,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2360519289970398,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6114583333333333,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.28973097801208497,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023605193197727203,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023605193197727203,
|
|
"signal/format_reward/centered_abs_mean": 0.4414442241191864,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.4756063938140869,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2207221120595932,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2207221120595932,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003877933043986559,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31023464202880857,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.39861111111111114,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3682305455207825,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03102346435189247,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03102346435189247,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.449966561794281,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30486111111111114,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4825741767883301,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04499665722250938,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04499665722250938,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5273605183169956,
|
|
"calibration/batch_distribution_entropy": 0.2604161993663435,
|
|
"calibration/batch_entropy_100bins": 0.34585173532919666,
|
|
"calibration/batch_entropy_10bins": 0.2604161993663435,
|
|
"calibration/batch_entropy_50bins": 0.40165626020405876,
|
|
"calibration/batch_uniqueness": 0.502415543865784,
|
|
"calibration/confidence_entropy": 0.22185867612429216,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.054814814814814816,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4804000280177963,
|
|
"calibration/mean_confidence": 0.9198524716449924,
|
|
"calibration/prompt_uniqueness": 0.3770142813382146,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.018750000000000024,
|
|
"completions/max_length": 3941.4,
|
|
"completions/max_terminated_length": 3941.4,
|
|
"completions/mean_length": 474.6142333984375,
|
|
"completions/mean_terminated_length": 483.915966796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 3.8,
|
|
"epoch": 0.023999700003749954,
|
|
"grad_norm": 0.0029272218234837055,
|
|
"learning_rate": 1.1904761904761906e-06,
|
|
"loss": 0.0019,
|
|
"num_tokens": 17682347.0,
|
|
"reward": 0.5634824514389039,
|
|
"reward_std": 0.42838944792747496,
|
|
"rewards/accuracy_reward": 0.289496523141861,
|
|
"rewards/brier_reward": 0.35398504734039304,
|
|
"rewards/confidence_uniqueness_reward": 0.35191494822502134,
|
|
"rewards/format_reward": 0.7096354126930237,
|
|
"rewards/frontier_aurc_reward": 0.3055602788925171,
|
|
"rewards/frontier_coverage_0": 0.3055602788925171,
|
|
"rewards/frontier_coverage_1": 0.3055602788925171,
|
|
"rewards/frontier_coverage_10": 0.3055602788925171,
|
|
"rewards/frontier_coverage_15": 0.3055602788925171,
|
|
"rewards/frontier_coverage_20": 0.3055602788925171,
|
|
"rewards/frontier_coverage_25": 0.3055602788925171,
|
|
"rewards/frontier_coverage_5": 0.3055602788925171,
|
|
"rewards/frontier_ece_reward": 0.3055602788925171,
|
|
"rewards/frontier_entropy_batch_reward": -0.6778560400009155,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.32107747793197633,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2420138888888889,
|
|
"signal/accuracy_reward/group_std_mean": 0.38136300444602966,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.06388889066874981,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.16053873896598816,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.16053873896598816,
|
|
"signal/advantage_abs_mean": 0.3579964995384216,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3579964995384216,
|
|
"signal/advantage_pre_scale_std": 0.4340688169002533,
|
|
"signal/advantage_std": 0.4340688169002533,
|
|
"signal/brier_reward/centered_abs_mean": 0.3165506422519684,
|
|
"signal/brier_reward/group_bin_occupancy": 0.5385416666666667,
|
|
"signal/brier_reward/group_std_mean": 0.3699568331241608,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03165506534278393,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03165506534278393,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.22355839908123015,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.611111111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.27911095023155214,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022355839610099792,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022355839610099792,
|
|
"signal/format_reward/centered_abs_mean": 0.36045463681221007,
|
|
"signal/format_reward/group_bin_occupancy": 0.24895833333333334,
|
|
"signal/format_reward/group_std_mean": 0.42311119437217715,
|
|
"signal/format_reward/group_zero_std_frac": 0.00833333358168602,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.18022731840610504,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.18022731840610504,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039307738188654184,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31446189880371095,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4184027777777778,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3722220480442047,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03144619055092335,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03144619055092335,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.38704427480697634,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.30694444444444446,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.444519454240799,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03870442658662796,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03870442658662796,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5167585765758467,
|
|
"calibration/batch_distribution_entropy": 0.302369935218339,
|
|
"calibration/batch_entropy_100bins": 0.36347179220282333,
|
|
"calibration/batch_entropy_10bins": 0.302369935218339,
|
|
"calibration/batch_entropy_50bins": 0.422450421929475,
|
|
"calibration/batch_uniqueness": 0.5326884584432358,
|
|
"calibration/confidence_entropy": 0.2414202213951906,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.07401129943502824,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.49241807139440097,
|
|
"calibration/mean_confidence": 0.9089811337755254,
|
|
"calibration/prompt_uniqueness": 0.43205592433820234,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.009461805555555536,
|
|
"completions/max_length": 3884.0,
|
|
"completions/max_terminated_length": 3884.0,
|
|
"completions/mean_length": 432.0356872558594,
|
|
"completions/mean_terminated_length": 436.19459228515626,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 47.2,
|
|
"epoch": 0.03599955000562493,
|
|
"grad_norm": 0.0013383845798671246,
|
|
"learning_rate": 1.7857142857142859e-06,
|
|
"loss": -0.0098,
|
|
"num_tokens": 25761382.0,
|
|
"reward": 0.7086469292640686,
|
|
"reward_std": 0.34355844259262086,
|
|
"rewards/accuracy_reward": 0.33203125,
|
|
"rewards/brier_reward": 0.43671444058418274,
|
|
"rewards/confidence_uniqueness_reward": 0.5003645718097687,
|
|
"rewards/format_reward": 0.9299479365348816,
|
|
"rewards/frontier_aurc_reward": 0.36155037879943847,
|
|
"rewards/frontier_coverage_0": 0.36155037879943847,
|
|
"rewards/frontier_coverage_1": 0.36155037879943847,
|
|
"rewards/frontier_coverage_10": 0.36155037879943847,
|
|
"rewards/frontier_coverage_15": 0.36155037879943847,
|
|
"rewards/frontier_coverage_20": 0.36155037879943847,
|
|
"rewards/frontier_coverage_25": 0.36155037879943847,
|
|
"rewards/frontier_coverage_5": 0.36155037879943847,
|
|
"rewards/frontier_ece_reward": 0.36155037879943847,
|
|
"rewards/frontier_entropy_batch_reward": -0.8836066842079162,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.320947265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23958333333333334,
|
|
"signal/accuracy_reward/group_std_mean": 0.3797557592391968,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.08333333507180214,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1604736328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1604736328125,
|
|
"signal/advantage_abs_mean": 0.2800030291080475,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2800030291080475,
|
|
"signal/advantage_pre_scale_std": 0.35208467245101926,
|
|
"signal/advantage_std": 0.35208467245101926,
|
|
"signal/brier_reward/centered_abs_mean": 0.30012611150741575,
|
|
"signal/brier_reward/group_bin_occupancy": 0.6045138888888888,
|
|
"signal/brier_reward/group_std_mean": 0.3523735284805298,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03001261092722416,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03001261092722416,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18756819367408753,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6347222222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2359054923057556,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018756820634007455,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018756820634007455,
|
|
"signal/format_reward/centered_abs_mean": 0.1186360664665699,
|
|
"signal/format_reward/group_bin_occupancy": 0.215625,
|
|
"signal/format_reward/group_std_mean": 0.2005244880914688,
|
|
"signal/format_reward/group_zero_std_frac": 0.27500001192092893,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.05931803323328495,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.05931803323328495,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003913117619231343,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.3130494236946106,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4802083333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.36802791357040404,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.031304940953850745,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.031304940953850745,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19124604463577272,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3173611111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2992013156414032,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07500000167638063,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019124605879187583,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019124605879187583,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.46943229603801095,
|
|
"calibration/batch_distribution_entropy": 0.39007867356490084,
|
|
"calibration/batch_entropy_100bins": 0.395942880351314,
|
|
"calibration/batch_entropy_10bins": 0.39007867356490084,
|
|
"calibration/batch_entropy_50bins": 0.4590101472703655,
|
|
"calibration/batch_uniqueness": 0.5824014613826816,
|
|
"calibration/buffer_distribution_entropy": 0.3087817459654445,
|
|
"calibration/buffer_entropy_100bins": 0.3742379471182134,
|
|
"calibration/buffer_entropy_10bins": 0.3087817459654445,
|
|
"calibration/buffer_entropy_50bins": 0.4334464532283356,
|
|
"calibration/confidence_entropy": 0.3112217014467542,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.021409921671018274,
|
|
"calibration/coverage@30%": 0.042631853785900786,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.396358403667547,
|
|
"calibration/mean_confidence": 0.8837364281993014,
|
|
"calibration/prompt_uniqueness": 0.5032654060971099,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011111111111111117,
|
|
"completions/max_length": 3641.8,
|
|
"completions/max_terminated_length": 3641.8,
|
|
"completions/mean_length": 448.12864990234374,
|
|
"completions/mean_terminated_length": 453.1479919433594,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 81.4,
|
|
"epoch": 0.04799940000749991,
|
|
"grad_norm": 0.0010318111162632704,
|
|
"learning_rate": 2.380952380952381e-06,
|
|
"loss": -0.0095,
|
|
"num_tokens": 34037520.0,
|
|
"reward": 0.7606997966766358,
|
|
"reward_std": 0.268657323718071,
|
|
"rewards/accuracy_reward": 0.43125,
|
|
"rewards/brier_reward": 0.547679090499878,
|
|
"rewards/confidence_uniqueness_reward": 0.5864130258560181,
|
|
"rewards/format_reward": 0.9827256917953491,
|
|
"rewards/frontier_aurc_reward": 0.17052557989954947,
|
|
"rewards/frontier_coverage_0": 0.1809873386286199,
|
|
"rewards/frontier_coverage_1": 0.1809873386286199,
|
|
"rewards/frontier_coverage_10": 0.1809873386286199,
|
|
"rewards/frontier_coverage_15": 0.1809873386286199,
|
|
"rewards/frontier_coverage_20": 0.1809873386286199,
|
|
"rewards/frontier_coverage_25": 0.1809873386286199,
|
|
"rewards/frontier_coverage_5": 0.1809873386286199,
|
|
"rewards/frontier_ece_reward": 0.16180366985499858,
|
|
"rewards/frontier_entropy_batch_reward": -0.9384560346603393,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.3011718809604645,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.24305555555555558,
|
|
"signal/accuracy_reward/group_std_mean": 0.37031384706497195,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.055555556900799274,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.15058594048023224,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.15058594048023224,
|
|
"signal/advantage_abs_mean": 0.21566397547721863,
|
|
"signal/advantage_pre_scale_abs_mean": 0.21566397547721863,
|
|
"signal/advantage_pre_scale_std": 0.2766494989395142,
|
|
"signal/advantage_std": 0.2766494989395142,
|
|
"signal/brier_reward/centered_abs_mean": 0.26345101892948153,
|
|
"signal/brier_reward/group_bin_occupancy": 0.64375,
|
|
"signal/brier_reward/group_std_mean": 0.3199512481689453,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026345102488994597,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.026345102488994597,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18284115493297576,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6225694444444445,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.220096555352211,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018284116685390473,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018284116685390473,
|
|
"signal/format_reward/centered_abs_mean": 0.03140733540058136,
|
|
"signal/format_reward/group_bin_occupancy": 0.16180555555555554,
|
|
"signal/format_reward/group_std_mean": 0.06625646576285363,
|
|
"signal/format_reward/group_zero_std_frac": 0.7055555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01570366770029068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01570366770029068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.11926614781841635,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.632986111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.14433029675856232,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.001490826773806475,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.001490826773806475,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13713490664958955,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.6166666666666667,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17561094984412193,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13713490664958955,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.6166666666666667,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17561094984412193,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13713490664958955,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.6166666666666667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17561094984412193,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13713490664958955,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.6166666666666667,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17561094984412193,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13713490664958955,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.6166666666666667,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17561094984412193,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13713490664958955,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.6166666666666667,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17561094984412193,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13713490664958955,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.6166666666666667,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17561094984412193,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017141862597782164,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.209702330827713,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.4791666666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.25489507615566254,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02097023241221905,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02097023241221905,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10760662704706192,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.27881944444444445,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2045659214258194,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.2472222238779068,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010760662704706192,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010760662704706192,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36227726815472583,
|
|
"calibration/batch_distribution_entropy": 0.5686667040639641,
|
|
"calibration/batch_entropy_100bins": 0.46015330431227275,
|
|
"calibration/batch_entropy_10bins": 0.5686667040639641,
|
|
"calibration/batch_entropy_50bins": 0.5345691542936407,
|
|
"calibration/batch_uniqueness": 0.7103172249744851,
|
|
"calibration/buffer_distribution_entropy": 0.3577538162654611,
|
|
"calibration/buffer_entropy_100bins": 0.3942461572372418,
|
|
"calibration/buffer_entropy_10bins": 0.3577538162654611,
|
|
"calibration/buffer_entropy_50bins": 0.45707372334068264,
|
|
"calibration/confidence_entropy": 0.38490834959662473,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.04647519582245431,
|
|
"calibration/coverage@20%": 0.1167582795107874,
|
|
"calibration/coverage@25%": 0.23117356053318674,
|
|
"calibration/coverage@30%": 0.4,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.24973699229306895,
|
|
"calibration/mean_confidence": 0.8392709913535077,
|
|
"calibration/prompt_uniqueness": 0.6169621770560031,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010850694444444442,
|
|
"completions/max_length": 3946.0,
|
|
"completions/max_terminated_length": 3946.0,
|
|
"completions/mean_length": 506.2393310546875,
|
|
"completions/mean_terminated_length": 511.80390625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 104.0,
|
|
"epoch": 0.05999925000937488,
|
|
"grad_norm": 0.0006832340732216835,
|
|
"learning_rate": 2.9761904761904763e-06,
|
|
"loss": -0.0061,
|
|
"num_tokens": 42993845.0,
|
|
"reward": 0.8102578759193421,
|
|
"reward_std": 0.21867357492446898,
|
|
"rewards/accuracy_reward": 0.5464409768581391,
|
|
"rewards/brier_reward": 0.6634011149406434,
|
|
"rewards/confidence_uniqueness_reward": 0.6947197318077087,
|
|
"rewards/format_reward": 0.9862847328186035,
|
|
"rewards/frontier_aurc_reward": -0.004134428594261408,
|
|
"rewards/frontier_coverage_0": 0.0019480698741972447,
|
|
"rewards/frontier_coverage_1": 0.0019480698741972447,
|
|
"rewards/frontier_coverage_10": 0.0019480698741972447,
|
|
"rewards/frontier_coverage_15": 0.0019480698741972447,
|
|
"rewards/frontier_coverage_20": 0.0019480698741972447,
|
|
"rewards/frontier_coverage_25": 0.0019480698741972447,
|
|
"rewards/frontier_coverage_5": 0.0019480698741972447,
|
|
"rewards/frontier_ece_reward": 0.022111652628518642,
|
|
"rewards/frontier_entropy_batch_reward": -0.9424700856208801,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.27006836533546447,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.23680555555555555,
|
|
"signal/accuracy_reward/group_std_mean": 0.3380339086055756,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.10555555671453476,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.13503418266773223,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.13503418266773223,
|
|
"signal/advantage_abs_mean": 0.17086843848228456,
|
|
"signal/advantage_pre_scale_abs_mean": 0.17086843848228456,
|
|
"signal/advantage_pre_scale_std": 0.2329561173915863,
|
|
"signal/advantage_std": 0.2329561173915863,
|
|
"signal/brier_reward/centered_abs_mean": 0.20691562294960023,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7083333333333335,
|
|
"signal/brier_reward/group_std_mean": 0.25875782668590547,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020691563189029694,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020691563189029694,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1167220115661621,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6739583333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.14770560711622238,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011672201752662658,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011672201752662658,
|
|
"signal/format_reward/centered_abs_mean": 0.024240451492369176,
|
|
"signal/format_reward/group_bin_occupancy": 0.14965277777777777,
|
|
"signal/format_reward/group_std_mean": 0.04735830463469028,
|
|
"signal/format_reward/group_zero_std_frac": 0.8027777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012120225746184588,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012120225746184588,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027726517990231516,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7083333333333334,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00418220111168921,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4658145887078716e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4658145887078716e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.04216930866241455,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.7506944444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.06601626127958297,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.04216930866241455,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.7506944444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.06601626127958297,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.04216930866241455,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.7506944444444444,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.06601626127958297,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.04216930866241455,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.7506944444444444,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.06601626127958297,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.04216930866241455,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.7506944444444444,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06601626127958297,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04216930866241455,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.7506944444444444,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06601626127958297,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.04216930866241455,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.7506944444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.06601626127958297,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0005271163769066334,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.11826423108577729,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6211805555555556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.14597586840391158,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011826423183083534,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011826423183083534,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10026835501194001,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2576388888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.20214761793613434,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.27777778208255766,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010026836022734641,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010026836022734641,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28030120111462953,
|
|
"calibration/batch_distribution_entropy": 0.6860934029668021,
|
|
"calibration/batch_entropy_100bins": 0.47509042724861394,
|
|
"calibration/batch_entropy_10bins": 0.6860934029668021,
|
|
"calibration/batch_entropy_50bins": 0.5571003799016714,
|
|
"calibration/batch_uniqueness": 0.7315819333475163,
|
|
"calibration/buffer_distribution_entropy": 0.44782185406696956,
|
|
"calibration/buffer_entropy_100bins": 0.432568095461576,
|
|
"calibration/buffer_entropy_10bins": 0.44782185406696956,
|
|
"calibration/buffer_entropy_50bins": 0.5016514349890427,
|
|
"calibration/confidence_entropy": 0.5032410205898324,
|
|
"calibration/coverage@0%": 0.0020942408376963353,
|
|
"calibration/coverage@1%": 0.0020942408376963353,
|
|
"calibration/coverage@10%": 0.019895287958115182,
|
|
"calibration/coverage@15%": 0.02722513089005236,
|
|
"calibration/coverage@20%": 0.13418440334318799,
|
|
"calibration/coverage@25%": 0.35231416191675474,
|
|
"calibration/coverage@30%": 0.7237647681307741,
|
|
"calibration/coverage@5%": 0.0020942408376963353,
|
|
"calibration/ece": 0.12329795557525347,
|
|
"calibration/mean_confidence": 0.7611700501923784,
|
|
"calibration/prompt_uniqueness": 0.6430085674820394,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01753472222222221,
|
|
"completions/max_length": 4004.2,
|
|
"completions/max_terminated_length": 4004.2,
|
|
"completions/mean_length": 587.9065307617187,
|
|
"completions/mean_terminated_length": 598.4256591796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 129.6,
|
|
"epoch": 0.07199910001124986,
|
|
"grad_norm": 0.0004425600345712155,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": -0.0096,
|
|
"num_tokens": 52876448.0,
|
|
"reward": 0.8403311133384704,
|
|
"reward_std": 0.19080861508846284,
|
|
"rewards/accuracy_reward": 0.5989583373069763,
|
|
"rewards/brier_reward": 0.7185598731040954,
|
|
"rewards/confidence_uniqueness_reward": 0.7107228398323059,
|
|
"rewards/format_reward": 0.9805555701255798,
|
|
"rewards/frontier_aurc_reward": -0.0033631839789450167,
|
|
"rewards/frontier_coverage_0": -0.010212953144218773,
|
|
"rewards/frontier_coverage_1": -0.010212953144218773,
|
|
"rewards/frontier_coverage_10": -0.010212953144218773,
|
|
"rewards/frontier_coverage_15": -0.010212953144218773,
|
|
"rewards/frontier_coverage_20": -0.010212953144218773,
|
|
"rewards/frontier_coverage_25": -0.010212953144218773,
|
|
"rewards/frontier_coverage_5": -0.010212953144218773,
|
|
"rewards/frontier_ece_reward": 0.026352604478597642,
|
|
"rewards/frontier_entropy_batch_reward": -0.9405368328094482,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.23267143666744233,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.225,
|
|
"signal/accuracy_reward/group_std_mean": 0.29561176896095276,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11633571833372117,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11633571833372117,
|
|
"signal/advantage_abs_mean": 0.1454018846154213,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1454018846154213,
|
|
"signal/advantage_pre_scale_std": 0.21304741203784944,
|
|
"signal/advantage_std": 0.21304741203784944,
|
|
"signal/brier_reward/centered_abs_mean": 0.16264356970787047,
|
|
"signal/brier_reward/group_bin_occupancy": 0.7659722222222223,
|
|
"signal/brier_reward/group_std_mean": 0.20704346001148224,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016264356672763824,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016264356672763824,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11230488270521163,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6600694444444444,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1442235678434372,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011230488680303097,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011230488680303097,
|
|
"signal/format_reward/centered_abs_mean": 0.03282335102558136,
|
|
"signal/format_reward/group_bin_occupancy": 0.15729166666666666,
|
|
"signal/format_reward/group_std_mean": 0.061944124102592465,
|
|
"signal/format_reward/group_zero_std_frac": 0.7416666626930237,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01641167551279068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01641167551279068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016978590982034802,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7225694444444445,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026338005904108287,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1223240401013756e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1223240401013756e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.058112889528274536,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.0808319017291069,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.058112889528274536,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.0808319017291069,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.058112889528274536,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.0808319017291069,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.058112889528274536,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0808319017291069,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.058112889528274536,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0808319017291069,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.058112889528274536,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0808319017291069,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.058112889528274536,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.0808319017291069,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0007264111656695605,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07377360388636589,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7083333333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09336973130702972,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007377360574901104,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007377360574901104,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10462483614683152,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.2517361111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2067155808210373,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.2888888955116272,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010462483763694764,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010462483763694764,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2571494357039368,
|
|
"calibration/batch_distribution_entropy": 0.6915737521952584,
|
|
"calibration/batch_entropy_100bins": 0.49647134799940157,
|
|
"calibration/batch_entropy_10bins": 0.6915737521952584,
|
|
"calibration/batch_entropy_50bins": 0.5702481285621201,
|
|
"calibration/batch_uniqueness": 0.7275370635688326,
|
|
"calibration/buffer_distribution_entropy": 0.5412420769857675,
|
|
"calibration/buffer_entropy_100bins": 0.47152686196123883,
|
|
"calibration/buffer_entropy_10bins": 0.5412420769857675,
|
|
"calibration/buffer_entropy_50bins": 0.5471484481600025,
|
|
"calibration/confidence_entropy": 0.5555159358951477,
|
|
"calibration/coverage@0%": 0.0021108982484691493,
|
|
"calibration/coverage@1%": 0.0021108982484691493,
|
|
"calibration/coverage@10%": 0.05757886667151095,
|
|
"calibration/coverage@15%": 0.10641635561934473,
|
|
"calibration/coverage@20%": 0.2736149765303272,
|
|
"calibration/coverage@25%": 0.5459915878765254,
|
|
"calibration/coverage@30%": 0.8,
|
|
"calibration/coverage@5%": 0.01375110988868079,
|
|
"calibration/ece": 0.09159402045781669,
|
|
"calibration/mean_confidence": 0.7156475196151146,
|
|
"calibration/prompt_uniqueness": 0.6363292257990881,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.019791666666666673,
|
|
"completions/max_length": 3775.4,
|
|
"completions/max_terminated_length": 3775.4,
|
|
"completions/mean_length": 639.6553955078125,
|
|
"completions/mean_terminated_length": 652.6179931640625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 195.2,
|
|
"epoch": 0.08399895001312484,
|
|
"grad_norm": 0.0005004682461731136,
|
|
"learning_rate": 4.166666666666667e-06,
|
|
"loss": -0.0115,
|
|
"num_tokens": 63322718.0,
|
|
"reward": 0.8644884347915649,
|
|
"reward_std": 0.17564767897129058,
|
|
"rewards/accuracy_reward": 0.6354166746139527,
|
|
"rewards/brier_reward": 0.7466939330101013,
|
|
"rewards/confidence_uniqueness_reward": 0.7118972659111023,
|
|
"rewards/format_reward": 0.9786458373069763,
|
|
"rewards/frontier_aurc_reward": -0.002773157227784395,
|
|
"rewards/frontier_coverage_0": -0.022431935556232928,
|
|
"rewards/frontier_coverage_1": -0.022431935556232928,
|
|
"rewards/frontier_coverage_10": -0.022431935556232928,
|
|
"rewards/frontier_coverage_15": -0.022431935556232928,
|
|
"rewards/frontier_coverage_20": -0.022431935556232928,
|
|
"rewards/frontier_coverage_25": -0.022431935556232928,
|
|
"rewards/frontier_coverage_5": -0.022431935556232928,
|
|
"rewards/frontier_ece_reward": 0.019234086759388445,
|
|
"rewards/frontier_entropy_batch_reward": -0.8832790136337281,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.20523003339767457,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21597222222222223,
|
|
"signal/accuracy_reward/group_std_mean": 0.26482152938842773,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.272222226858139,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10261501669883728,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10261501669883728,
|
|
"signal/advantage_abs_mean": 0.13137987852096558,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13137987852096558,
|
|
"signal/advantage_pre_scale_std": 0.1967354714870453,
|
|
"signal/advantage_std": 0.1967354714870453,
|
|
"signal/brier_reward/centered_abs_mean": 0.13508679270744323,
|
|
"signal/brier_reward/group_bin_occupancy": 0.804861111111111,
|
|
"signal/brier_reward/group_std_mean": 0.1748790144920349,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013508679158985615,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013508679158985615,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.132964688539505,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6645833333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.16216650009155273,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.013296469673514366,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013296469673514366,
|
|
"signal/format_reward/centered_abs_mean": 0.03264973983168602,
|
|
"signal/format_reward/group_bin_occupancy": 0.15104166666666669,
|
|
"signal/format_reward/group_std_mean": 0.055517496168613435,
|
|
"signal/format_reward/group_zero_std_frac": 0.7916666865348816,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01632486991584301,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01632486991584301,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001194856408983469,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7513888888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0018525759922340512,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4935705621610396e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4935705621610396e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.07465948313474655,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8434027777777777,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.09974148273468017,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.07465948313474655,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8434027777777777,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.09974148273468017,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.07465948313474655,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8434027777777777,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.09974148273468017,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07465948313474655,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8434027777777777,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09974148273468017,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07465948313474655,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8434027777777777,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09974148273468017,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07465948313474655,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8434027777777777,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09974148273468017,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.07465948313474655,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8434027777777777,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.09974148273468017,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0009332435904070735,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.045828448981046675,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5989583333333334,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.062764922529459,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00458284504711628,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00458284504711628,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19048750698566436,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.34965277777777776,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.31525389552116395,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.10000000093132258,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019048751518130302,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019048751518130302,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3167568910312298,
|
|
"calibration/batch_distribution_entropy": 0.8158753007650332,
|
|
"calibration/batch_entropy_100bins": 0.7754491287714476,
|
|
"calibration/batch_entropy_10bins": 0.8158753007650332,
|
|
"calibration/batch_entropy_50bins": 0.8143281422196861,
|
|
"calibration/batch_uniqueness": 0.8912077436325131,
|
|
"calibration/buffer_distribution_entropy": 0.6146759941471388,
|
|
"calibration/buffer_entropy_100bins": 0.523218368594508,
|
|
"calibration/buffer_entropy_10bins": 0.6146759941471388,
|
|
"calibration/buffer_entropy_50bins": 0.6006597501001077,
|
|
"calibration/confidence_entropy": 0.5871514805879432,
|
|
"calibration/coverage@0%": 0.009528019872847458,
|
|
"calibration/coverage@1%": 0.009528019872847458,
|
|
"calibration/coverage@10%": 0.012702623047450633,
|
|
"calibration/coverage@15%": 0.025002088288092344,
|
|
"calibration/coverage@20%": 0.07057350282502413,
|
|
"calibration/coverage@25%": 0.3320165297852925,
|
|
"calibration/coverage@30%": 0.4634608198502722,
|
|
"calibration/coverage@5%": 0.012702623047450633,
|
|
"calibration/ece": 0.14937205173281268,
|
|
"calibration/mean_confidence": 0.6103869655080439,
|
|
"calibration/prompt_uniqueness": 0.8211696893763782,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020572916666666673,
|
|
"completions/max_length": 3653.4,
|
|
"completions/max_terminated_length": 3653.4,
|
|
"completions/mean_length": 677.6016479492188,
|
|
"completions/mean_terminated_length": 691.8317626953125,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 219.6,
|
|
"epoch": 0.09599880001499982,
|
|
"grad_norm": 0.0004252003855071962,
|
|
"learning_rate": 4.761904761904762e-06,
|
|
"loss": -0.0151,
|
|
"num_tokens": 74248209.0,
|
|
"reward": 0.9098719239234925,
|
|
"reward_std": 0.170660662651062,
|
|
"rewards/accuracy_reward": 0.6442708253860474,
|
|
"rewards/brier_reward": 0.7366751790046692,
|
|
"rewards/confidence_uniqueness_reward": 0.8783312201499939,
|
|
"rewards/format_reward": 0.977343738079071,
|
|
"rewards/frontier_aurc_reward": -0.0024696006905287502,
|
|
"rewards/frontier_coverage_0": -0.045524665340781215,
|
|
"rewards/frontier_coverage_1": -0.045524665340781215,
|
|
"rewards/frontier_coverage_10": -0.045524665340781215,
|
|
"rewards/frontier_coverage_15": -0.045524665340781215,
|
|
"rewards/frontier_coverage_20": -0.045524665340781215,
|
|
"rewards/frontier_coverage_25": -0.045524665340781215,
|
|
"rewards/frontier_coverage_5": -0.045524665340781215,
|
|
"rewards/frontier_ece_reward": 0.00879600141197443,
|
|
"rewards/frontier_entropy_batch_reward": -0.5930132269859314,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18386501371860503,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21423611111111113,
|
|
"signal/accuracy_reward/group_std_mean": 0.24782910346984863,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.286111119389534,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09193250685930252,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09193250685930252,
|
|
"signal/advantage_abs_mean": 0.125397390127182,
|
|
"signal/advantage_pre_scale_abs_mean": 0.125397390127182,
|
|
"signal/advantage_pre_scale_std": 0.19242337942123414,
|
|
"signal/advantage_std": 0.19242337942123414,
|
|
"signal/brier_reward/centered_abs_mean": 0.14977407157421113,
|
|
"signal/brier_reward/group_bin_occupancy": 0.88125,
|
|
"signal/brier_reward/group_std_mean": 0.19257045090198516,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014977407827973365,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014977407827973365,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07975448668003082,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7291666666666667,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11011579483747483,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007975448574870824,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007975448574870824,
|
|
"signal/format_reward/centered_abs_mean": 0.03768988735973835,
|
|
"signal/format_reward/group_bin_occupancy": 0.15659722222222222,
|
|
"signal/format_reward/group_std_mean": 0.06584622710943222,
|
|
"signal/format_reward/group_zero_std_frac": 0.7472222328186036,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.018844943679869174,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.018844943679869174,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011158839566633104,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7399305555555556,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0017659143777564168,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.3948549531050958e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.3948549531050958e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13970998972654342,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8805555555555558,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18841452598571778,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13970998972654342,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8805555555555558,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18841452598571778,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13970998972654342,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8805555555555558,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18841452598571778,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13970998972654342,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8805555555555558,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18841452598571778,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13970998972654342,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8805555555555558,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18841452598571778,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13970998972654342,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8805555555555558,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.18841452598571778,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13970998972654342,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8805555555555558,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18841452598571778,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017463748808950186,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03632725402712822,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.617361111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.054545311629772185,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036327255424112082,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036327255424112082,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42244229912757875,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7017361111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.49159557223320005,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04224423244595528,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04224423244595528,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2206993313754026,
|
|
"calibration/batch_distribution_entropy": 0.9724187710530586,
|
|
"calibration/batch_entropy_100bins": 0.9570917986236539,
|
|
"calibration/batch_entropy_10bins": 0.9724187710530586,
|
|
"calibration/batch_entropy_50bins": 0.9691055641857181,
|
|
"calibration/batch_uniqueness": 0.9515475678236415,
|
|
"calibration/buffer_distribution_entropy": 0.7119279342121769,
|
|
"calibration/buffer_entropy_100bins": 0.6250743755525399,
|
|
"calibration/buffer_entropy_10bins": 0.7119279342121769,
|
|
"calibration/buffer_entropy_50bins": 0.6950079249559582,
|
|
"calibration/confidence_entropy": 0.5458137036233696,
|
|
"calibration/coverage@0%": 0.020305716058574348,
|
|
"calibration/coverage@1%": 0.020305716058574348,
|
|
"calibration/coverage@10%": 0.057875053452992965,
|
|
"calibration/coverage@15%": 0.08067257367669875,
|
|
"calibration/coverage@20%": 0.3934809427991608,
|
|
"calibration/coverage@25%": 0.8858671787769413,
|
|
"calibration/coverage@30%": 0.9934959349593496,
|
|
"calibration/coverage@5%": 0.025541318152815184,
|
|
"calibration/ece": 0.24761169294312846,
|
|
"calibration/mean_confidence": 0.506569974973911,
|
|
"calibration/prompt_uniqueness": 0.8903720034084112,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.021180555555555557,
|
|
"completions/max_length": 3845.0,
|
|
"completions/max_terminated_length": 3845.0,
|
|
"completions/mean_length": 723.7879516601563,
|
|
"completions/mean_terminated_length": 739.548291015625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 221.6,
|
|
"epoch": 0.1079986500168748,
|
|
"grad_norm": 0.0005128175835125148,
|
|
"learning_rate": 4.909638554216868e-06,
|
|
"loss": -0.0162,
|
|
"num_tokens": 85721510.0,
|
|
"reward": 0.9443981885910034,
|
|
"reward_std": 0.16389898359775543,
|
|
"rewards/accuracy_reward": 0.6543402791023254,
|
|
"rewards/brier_reward": 0.6937033653259277,
|
|
"rewards/confidence_uniqueness_reward": 0.9300097227096558,
|
|
"rewards/format_reward": 0.978038203716278,
|
|
"rewards/frontier_aurc_reward": -0.002327358117327094,
|
|
"rewards/frontier_coverage_0": -0.0785758774727583,
|
|
"rewards/frontier_coverage_1": -0.0785758774727583,
|
|
"rewards/frontier_coverage_10": -0.0785758774727583,
|
|
"rewards/frontier_coverage_15": -0.0785758774727583,
|
|
"rewards/frontier_coverage_20": -0.0785758774727583,
|
|
"rewards/frontier_coverage_25": -0.0785758774727583,
|
|
"rewards/frontier_coverage_5": -0.0785758774727583,
|
|
"rewards/frontier_ece_reward": -0.0007223693886771798,
|
|
"rewards/frontier_entropy_batch_reward": -0.27185641825199125,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19038628339767455,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21388888888888888,
|
|
"signal/accuracy_reward/group_std_mean": 0.2518360376358032,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.28888889253139494,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09519314169883727,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09519314169883727,
|
|
"signal/advantage_abs_mean": 0.12383081167936325,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12383081167936325,
|
|
"signal/advantage_pre_scale_std": 0.1818700224161148,
|
|
"signal/advantage_std": 0.1818700224161148,
|
|
"signal/brier_reward/centered_abs_mean": 0.2113836646080017,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9274305555555555,
|
|
"signal/brier_reward/group_std_mean": 0.2588895708322525,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02113836631178856,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02113836631178856,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.045432856678962706,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8055555555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07177356258034706,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004543285816907883,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004543285816907883,
|
|
"signal/format_reward/centered_abs_mean": 0.03470594622194767,
|
|
"signal/format_reward/group_bin_occupancy": 0.15312499999999998,
|
|
"signal/format_reward/group_std_mean": 0.05929795354604721,
|
|
"signal/format_reward/group_zero_std_frac": 0.7750000119209289,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017352973110973835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017352973110973835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014846524223685264,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.673611111111111,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0024840928614139556,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.855815662565874e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.855815662565874e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.24903229475021363,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.9211805555555556,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.32121285796165466,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24903229475021363,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.9211805555555556,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.32121285796165466,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24903229475021363,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.9211805555555556,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.32121285796165466,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24903229475021363,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9211805555555556,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.32121285796165466,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.24903229475021363,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9211805555555556,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.32121285796165466,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.24903229475021363,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9211805555555556,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.32121285796165466,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24903229475021363,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.9211805555555556,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.32121285796165466,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003112903609871864,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07297626733779908,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.804513888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09936150461435318,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0072976269759237765,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0072976269759237765,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35308589935302737,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7815972222222223,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4245911598205566,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03530859164893627,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03530859164893627,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.43490251490767023,
|
|
"calibration/batch_distribution_entropy": 0.9529251572760946,
|
|
"calibration/batch_entropy_100bins": 0.94042765491534,
|
|
"calibration/batch_entropy_10bins": 0.9529251572760946,
|
|
"calibration/batch_entropy_50bins": 0.952941110179381,
|
|
"calibration/batch_uniqueness": 0.9468997638508156,
|
|
"calibration/buffer_distribution_entropy": 0.772094451286123,
|
|
"calibration/buffer_entropy_100bins": 0.6983399485850001,
|
|
"calibration/buffer_entropy_10bins": 0.772094451286123,
|
|
"calibration/buffer_entropy_50bins": 0.7590855500965974,
|
|
"calibration/confidence_entropy": 0.4826112831276858,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.003825136612021858,
|
|
"calibration/coverage@20%": 0.01294977313373133,
|
|
"calibration/coverage@25%": 0.02212548072935199,
|
|
"calibration/coverage@30%": 0.032500629861073965,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.23091974604376944,
|
|
"calibration/mean_confidence": 0.6206757591253088,
|
|
"calibration/prompt_uniqueness": 0.8837748821431823,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01909722222222223,
|
|
"completions/max_length": 3686.6,
|
|
"completions/max_terminated_length": 3686.6,
|
|
"completions/mean_length": 757.1386474609375,
|
|
"completions/mean_terminated_length": 771.9427124023438,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 215.2,
|
|
"epoch": 0.11999850001874976,
|
|
"grad_norm": 0.0005383854149840772,
|
|
"learning_rate": 4.759036144578314e-06,
|
|
"loss": -0.016,
|
|
"num_tokens": 97541347.0,
|
|
"reward": 0.942762804031372,
|
|
"reward_std": 0.16731804311275483,
|
|
"rewards/accuracy_reward": 0.6445312619209289,
|
|
"rewards/brier_reward": 0.7177430987358093,
|
|
"rewards/confidence_uniqueness_reward": 0.9267112493515015,
|
|
"rewards/format_reward": 0.9802083373069763,
|
|
"rewards/frontier_aurc_reward": -0.0026557988487184046,
|
|
"rewards/frontier_coverage_0": -0.034297770075500014,
|
|
"rewards/frontier_coverage_1": -0.034297770075500014,
|
|
"rewards/frontier_coverage_10": -0.034297770075500014,
|
|
"rewards/frontier_coverage_15": -0.034297770075500014,
|
|
"rewards/frontier_coverage_20": -0.034297770075500014,
|
|
"rewards/frontier_coverage_25": -0.034297770075500014,
|
|
"rewards/frontier_coverage_5": -0.034297770075500014,
|
|
"rewards/frontier_ece_reward": 0.018495285883545876,
|
|
"rewards/frontier_entropy_batch_reward": -0.3286770522594452,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18459743857383729,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20972222222222223,
|
|
"signal/accuracy_reward/group_std_mean": 0.24165296256542207,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.32222222685813906,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09229871928691864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09229871928691864,
|
|
"signal/advantage_abs_mean": 0.12547171711921692,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12547171711921692,
|
|
"signal/advantage_pre_scale_std": 0.1906561881303787,
|
|
"signal/advantage_std": 0.1906561881303787,
|
|
"signal/brier_reward/centered_abs_mean": 0.21231429576873778,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8871527777777779,
|
|
"signal/brier_reward/group_std_mean": 0.26225546598434446,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021231430768966674,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021231430768966674,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.046096354722976685,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7819444444444444,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07447160631418229,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004609635565429926,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004609635565429926,
|
|
"signal/format_reward/centered_abs_mean": 0.03169487938284874,
|
|
"signal/format_reward/group_bin_occupancy": 0.15381944444444443,
|
|
"signal/format_reward/group_std_mean": 0.05741401687264443,
|
|
"signal/format_reward/group_zero_std_frac": 0.769444465637207,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01584743969142437,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01584743969142437,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002861758507788181,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6586805555555555,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00452820798382163,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5771980765275654e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5771980765275654e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19227492213249206,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2632899612188339,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19227492213249206,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2632899612188339,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19227492213249206,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2632899612188339,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19227492213249206,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2632899612188339,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19227492213249206,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2632899612188339,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19227492213249206,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2632899612188339,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19227492213249206,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2632899612188339,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002403436554595828,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08109527826309204,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8055555555555556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1046181559562683,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00810952829197049,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00810952829197049,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.389397132396698,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7958333333333334,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.45458305478096006,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03893971517682075,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03893971517682075,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.11999850001874976,
|
|
"eval_calibration/aurc": 0.2935445625829087,
|
|
"eval_calibration/batch_distribution_entropy": 0.8770594734217364,
|
|
"eval_calibration/batch_entropy_100bins": 0.6997554410803705,
|
|
"eval_calibration/batch_entropy_10bins": 0.8770594734217364,
|
|
"eval_calibration/batch_entropy_50bins": 0.7724054270082138,
|
|
"eval_calibration/batch_uniqueness": 0.8926113206729102,
|
|
"eval_calibration/buffer_distribution_entropy": 0.7915374005307516,
|
|
"eval_calibration/buffer_entropy_100bins": 0.7306963981499348,
|
|
"eval_calibration/buffer_entropy_10bins": 0.7915374005307516,
|
|
"eval_calibration/buffer_entropy_50bins": 0.7851080575149143,
|
|
"eval_calibration/confidence_entropy": 0.4853972869479209,
|
|
"eval_calibration/coverage@0%": 0.09122983870967742,
|
|
"eval_calibration/coverage@1%": 0.09122983870967742,
|
|
"eval_calibration/coverage@10%": 0.15036962365591397,
|
|
"eval_calibration/coverage@15%": 0.3385416666666667,
|
|
"eval_calibration/coverage@20%": 0.3923051075268817,
|
|
"eval_calibration/coverage@25%": 0.6832997311827956,
|
|
"eval_calibration/coverage@30%": 0.7689852150537635,
|
|
"eval_calibration/coverage@5%": 0.09122983870967742,
|
|
"eval_calibration/ece": 0.2806067776772616,
|
|
"eval_calibration/mean_confidence": 0.6447706628426046,
|
|
"eval_calibration/prompt_uniqueness": 0.8926113206729102,
|
|
"eval_completions/clipped_ratio": 0.028472222222222215,
|
|
"eval_completions/max_length": 2254.5,
|
|
"eval_completions/max_terminated_length": 2254.5,
|
|
"eval_completions/mean_length": 760.700185139974,
|
|
"eval_completions/mean_terminated_length": 782.921864827474,
|
|
"eval_completions/min_length": 0.0,
|
|
"eval_completions/min_terminated_length": 306.1666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 97541347.0,
|
|
"eval_reward": 0.8601760963598887,
|
|
"eval_reward_std": 0.2734912733236949,
|
|
"eval_rewards/accuracy_reward": 0.6284722288449606,
|
|
"eval_rewards/brier_reward": 0.7127746840318044,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8644578456878662,
|
|
"eval_rewards/format_reward": 0.972222218910853,
|
|
"eval_rewards/frontier_aurc_reward": -0.0026267794310115278,
|
|
"eval_rewards/frontier_coverage_0": -0.027196575111399095,
|
|
"eval_rewards/frontier_coverage_1": -0.027196575111399095,
|
|
"eval_rewards/frontier_coverage_10": -0.027196575111399095,
|
|
"eval_rewards/frontier_coverage_15": -0.027196575111399095,
|
|
"eval_rewards/frontier_coverage_20": -0.027196575111399095,
|
|
"eval_rewards/frontier_coverage_25": -0.027196575111399095,
|
|
"eval_rewards/frontier_coverage_5": -0.027196575111399095,
|
|
"eval_rewards/frontier_ece_reward": 0.01740353476877014,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.972222218910853,
|
|
"eval_runtime": 211.1601,
|
|
"eval_samples_per_second": 4.736,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4504123230775197,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.48123881717522937,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22520616153875986,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22520616153875986,
|
|
"eval_signal/advantage_abs_mean": 0.238955890138944,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.238955890138944,
|
|
"eval_signal/advantage_pre_scale_std": 0.27245956162611645,
|
|
"eval_signal/advantage_std": 0.27245956162611645,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.24741176019112268,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.9027777777777778,
|
|
"eval_signal/brier_reward/group_std_mean": 0.30227703352769214,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024741175894935925,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.024741175894935925,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07496882602572441,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.13260164111852646,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007496882385263841,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007496882385263841,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.051974826492369175,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.19444444444444442,
|
|
"eval_signal/format_reward/group_std_mean": 0.11886927050848801,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.4444444502393405,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.025987413246184587,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.025987413246184587,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0029930932990585766,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7048611111111112,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005078944067160289,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.741366587443432e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.741366587443432e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2153101439277331,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.8958333333333334,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.32011035084724426,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2153101439277331,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.8958333333333334,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.32011035084724426,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2153101439277331,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.8958333333333334,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.32011035084724426,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2153101439277331,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.8958333333333334,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.32011035084724426,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2153101439277331,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8958333333333334,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.32011035084724426,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2153101439277331,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.8958333333333334,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.32011035084724426,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.2153101439277331,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.8958333333333334,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.32011035084724426,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026913767602915564,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07921455428004265,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8854166666666666,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.10567496220270793,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007921455971275767,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007921455971275767,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.051974826492369175,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.19444444444444442,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.11886927050848801,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4444444502393405,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00519748261043181,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00519748261043181,
|
|
"eval_steps_per_second": 0.028,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29101160035536516,
|
|
"calibration/batch_distribution_entropy": 0.9611125544862056,
|
|
"calibration/batch_entropy_100bins": 0.9450097531606187,
|
|
"calibration/batch_entropy_10bins": 0.9611125544862056,
|
|
"calibration/batch_entropy_50bins": 0.9579487928522715,
|
|
"calibration/batch_uniqueness": 0.9479291763813198,
|
|
"calibration/buffer_distribution_entropy": 0.802567505648549,
|
|
"calibration/buffer_entropy_100bins": 0.7485174194719845,
|
|
"calibration/buffer_entropy_10bins": 0.802567505648549,
|
|
"calibration/buffer_entropy_50bins": 0.7990773865767944,
|
|
"calibration/confidence_entropy": 0.4802339646904608,
|
|
"calibration/coverage@0%": 0.005950499478157149,
|
|
"calibration/coverage@1%": 0.005950499478157149,
|
|
"calibration/coverage@10%": 0.05489786789920977,
|
|
"calibration/coverage@15%": 0.08344920618123151,
|
|
"calibration/coverage@20%": 0.38568360233253723,
|
|
"calibration/coverage@25%": 0.5033324927722896,
|
|
"calibration/coverage@30%": 0.5689476556018148,
|
|
"calibration/coverage@5%": 0.005950499478157149,
|
|
"calibration/ece": 0.19851863392301644,
|
|
"calibration/mean_confidence": 0.6001296239600306,
|
|
"calibration/prompt_uniqueness": 0.8830341006380135,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020225694444444442,
|
|
"completions/max_length": 3700.8,
|
|
"completions/max_terminated_length": 3700.8,
|
|
"completions/mean_length": 826.671875,
|
|
"completions/mean_terminated_length": 844.00693359375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 215.4,
|
|
"epoch": 0.13199835002062474,
|
|
"grad_norm": 0.0003322226111777127,
|
|
"learning_rate": 4.60843373493976e-06,
|
|
"loss": -0.0163,
|
|
"num_tokens": 110145183.0,
|
|
"reward": 0.9478794574737549,
|
|
"reward_std": 0.1643240600824356,
|
|
"rewards/accuracy_reward": 0.6479166626930237,
|
|
"rewards/brier_reward": 0.7111706972122193,
|
|
"rewards/confidence_uniqueness_reward": 0.9300649881362915,
|
|
"rewards/format_reward": 0.978993046283722,
|
|
"rewards/frontier_aurc_reward": -0.00247355445753783,
|
|
"rewards/frontier_coverage_0": -0.04043981209397316,
|
|
"rewards/frontier_coverage_1": -0.04043981209397316,
|
|
"rewards/frontier_coverage_10": -0.04043981209397316,
|
|
"rewards/frontier_coverage_15": -0.04043981209397316,
|
|
"rewards/frontier_coverage_20": -0.04043981209397316,
|
|
"rewards/frontier_coverage_25": -0.04043981209397316,
|
|
"rewards/frontier_coverage_5": -0.04043981209397316,
|
|
"rewards/frontier_ece_reward": 0.013695706240832805,
|
|
"rewards/frontier_entropy_batch_reward": -0.27499137818813324,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1818793386220932,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20902777777777776,
|
|
"signal/accuracy_reward/group_std_mean": 0.2380914032459259,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3277777791023254,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0909396693110466,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0909396693110466,
|
|
"signal/advantage_abs_mean": 0.12402228713035583,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12402228713035583,
|
|
"signal/advantage_pre_scale_std": 0.18747871220111847,
|
|
"signal/advantage_std": 0.18747871220111847,
|
|
"signal/brier_reward/centered_abs_mean": 0.2243650496006012,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8930555555555555,
|
|
"signal/brier_reward/group_std_mean": 0.27434876561164856,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022436505928635598,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022436505928635598,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.044763144105672836,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7885416666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0715868502855301,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004476314364001155,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004476314364001155,
|
|
"signal/format_reward/centered_abs_mean": 0.03343098945915699,
|
|
"signal/format_reward/group_bin_occupancy": 0.15243055555555557,
|
|
"signal/format_reward/group_std_mean": 0.05805426985025406,
|
|
"signal/format_reward/group_zero_std_frac": 0.7805555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016715494729578496,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016715494729578496,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024474710691720246,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6569444444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003879967099055648,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0593389601563105e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0593389601563105e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2272661030292511,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8708333333333333,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.30359464287757876,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2272661030292511,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8708333333333333,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30359464287757876,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2272661030292511,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8708333333333333,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30359464287757876,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2272661030292511,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8708333333333333,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30359464287757876,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2272661030292511,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8708333333333333,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30359464287757876,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2272661030292511,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8708333333333333,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.30359464287757876,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2272661030292511,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8708333333333333,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30359464287757876,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002840826474130154,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.08030216246843339,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10320238173007965,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00803021676838398,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00803021676838398,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3581114888191223,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7989583333333334,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.42964831590652464,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035811149328947064,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035811149328947064,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34571040020458177,
|
|
"calibration/batch_distribution_entropy": 0.9622518418951735,
|
|
"calibration/batch_entropy_100bins": 0.9519820743610523,
|
|
"calibration/batch_entropy_10bins": 0.9622518418951735,
|
|
"calibration/batch_entropy_50bins": 0.9639614098520457,
|
|
"calibration/batch_uniqueness": 0.9497552026076601,
|
|
"calibration/buffer_distribution_entropy": 0.8279265448356025,
|
|
"calibration/buffer_entropy_100bins": 0.7843073089436804,
|
|
"calibration/buffer_entropy_10bins": 0.8279265448356025,
|
|
"calibration/buffer_entropy_50bins": 0.8281794662889326,
|
|
"calibration/confidence_entropy": 0.517672969074423,
|
|
"calibration/coverage@0%": 0.005273351823406105,
|
|
"calibration/coverage@1%": 0.005273351823406105,
|
|
"calibration/coverage@10%": 0.04925240941502914,
|
|
"calibration/coverage@15%": 0.25197225967087444,
|
|
"calibration/coverage@20%": 0.2773683429293877,
|
|
"calibration/coverage@25%": 0.41644513658785354,
|
|
"calibration/coverage@30%": 0.43501219130263385,
|
|
"calibration/coverage@5%": 0.005273351823406105,
|
|
"calibration/ece": 0.2220912439346801,
|
|
"calibration/mean_confidence": 0.5937931918125188,
|
|
"calibration/prompt_uniqueness": 0.88741231383509,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.019791666666666652,
|
|
"completions/max_length": 3753.4,
|
|
"completions/max_terminated_length": 3753.4,
|
|
"completions/mean_length": 890.4375854492188,
|
|
"completions/mean_terminated_length": 908.44677734375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 269.2,
|
|
"epoch": 0.14399820002249972,
|
|
"grad_norm": 0.000333575124386698,
|
|
"learning_rate": 4.457831325301205e-06,
|
|
"loss": -0.0167,
|
|
"num_tokens": 123499600.0,
|
|
"reward": 0.9414217710494995,
|
|
"reward_std": 0.16816579103469848,
|
|
"rewards/accuracy_reward": 0.6327257037162781,
|
|
"rewards/brier_reward": 0.7177812337875367,
|
|
"rewards/confidence_uniqueness_reward": 0.9303562760353088,
|
|
"rewards/format_reward": 0.9795138835906982,
|
|
"rewards/frontier_aurc_reward": -0.002422581100836396,
|
|
"rewards/frontier_coverage_0": -0.03289339188486338,
|
|
"rewards/frontier_coverage_1": -0.03289339188486338,
|
|
"rewards/frontier_coverage_10": -0.03289339188486338,
|
|
"rewards/frontier_coverage_15": -0.03289339188486338,
|
|
"rewards/frontier_coverage_20": -0.03289339188486338,
|
|
"rewards/frontier_coverage_25": -0.03289339188486338,
|
|
"rewards/frontier_coverage_5": -0.03289339188486338,
|
|
"rewards/frontier_ece_reward": 0.011117698205634952,
|
|
"rewards/frontier_entropy_batch_reward": -0.27715103328227997,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1928331136703491,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21284722222222224,
|
|
"signal/accuracy_reward/group_std_mean": 0.25233509540557864,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.2972222208976746,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09641655683517455,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09641655683517455,
|
|
"signal/advantage_abs_mean": 0.12606564462184905,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12606564462184905,
|
|
"signal/advantage_pre_scale_std": 0.19068869948387146,
|
|
"signal/advantage_std": 0.19068869948387146,
|
|
"signal/brier_reward/centered_abs_mean": 0.20728689432144165,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9072916666666666,
|
|
"signal/brier_reward/group_std_mean": 0.2543580114841461,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020728689804673194,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020728689804673194,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.043829741328954695,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7784722222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07412301301956177,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004382974375039339,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004382974375039339,
|
|
"signal/format_reward/centered_abs_mean": 0.03404947929084301,
|
|
"signal/format_reward/group_bin_occupancy": 0.15694444444444444,
|
|
"signal/format_reward/group_std_mean": 0.06289056539535523,
|
|
"signal/format_reward/group_zero_std_frac": 0.7444444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.017024739645421505,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.017024739645421505,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021621018648147585,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6690972222222222,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034721433650702236,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.702627461985685e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.702627461985685e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20764632821083068,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2768110573291779,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20764632821083068,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2768110573291779,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20764632821083068,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2768110573291779,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20764632821083068,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2768110573291779,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20764632821083068,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2768110573291779,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20764632821083068,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2768110573291779,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20764632821083068,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2768110573291779,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002595579205080867,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.069983871281147,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8329861111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09274458438158036,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006998386885970831,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006998386885970831,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3481548845767975,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7982638888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4152218818664551,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034815489500761035,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034815489500761035,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2649692233853358,
|
|
"calibration/batch_distribution_entropy": 0.9561995071075307,
|
|
"calibration/batch_entropy_100bins": 0.948571532521358,
|
|
"calibration/batch_entropy_10bins": 0.9561995071075307,
|
|
"calibration/batch_entropy_50bins": 0.9586481201565193,
|
|
"calibration/batch_uniqueness": 0.9481666389414347,
|
|
"calibration/buffer_distribution_entropy": 0.8478863193017032,
|
|
"calibration/buffer_entropy_100bins": 0.8120021970066679,
|
|
"calibration/buffer_entropy_10bins": 0.8478863193017032,
|
|
"calibration/buffer_entropy_50bins": 0.85018672179855,
|
|
"calibration/confidence_entropy": 0.5380890006629526,
|
|
"calibration/coverage@0%": 0.01272840150692083,
|
|
"calibration/coverage@1%": 0.01272840150692083,
|
|
"calibration/coverage@10%": 0.08449456058933413,
|
|
"calibration/coverage@15%": 0.17000585864866288,
|
|
"calibration/coverage@20%": 0.49943408426598157,
|
|
"calibration/coverage@25%": 0.6136174853877333,
|
|
"calibration/coverage@30%": 0.6983957219251338,
|
|
"calibration/coverage@5%": 0.015894628419849587,
|
|
"calibration/ece": 0.1826561762527292,
|
|
"calibration/mean_confidence": 0.5725721913547105,
|
|
"calibration/prompt_uniqueness": 0.8862852361567064,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017534722222222233,
|
|
"completions/max_length": 3875.2,
|
|
"completions/max_terminated_length": 3875.2,
|
|
"completions/mean_length": 922.3502563476562,
|
|
"completions/mean_terminated_length": 938.8082885742188,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 259.6,
|
|
"epoch": 0.1559980500243747,
|
|
"grad_norm": 0.0003632537554949522,
|
|
"learning_rate": 4.307228915662651e-06,
|
|
"loss": -0.0139,
|
|
"num_tokens": 137219123.0,
|
|
"reward": 0.9586760997772217,
|
|
"reward_std": 0.15382943153381348,
|
|
"rewards/accuracy_reward": 0.6605902791023255,
|
|
"rewards/brier_reward": 0.7354176759719848,
|
|
"rewards/confidence_uniqueness_reward": 0.9327790856361389,
|
|
"rewards/format_reward": 0.9817708253860473,
|
|
"rewards/frontier_aurc_reward": -0.002018653857521713,
|
|
"rewards/frontier_coverage_0": -0.03963281610049307,
|
|
"rewards/frontier_coverage_1": -0.03963281610049307,
|
|
"rewards/frontier_coverage_10": -0.03963281610049307,
|
|
"rewards/frontier_coverage_15": -0.03963281610049307,
|
|
"rewards/frontier_coverage_20": -0.03963281610049307,
|
|
"rewards/frontier_coverage_25": -0.03963281610049307,
|
|
"rewards/frontier_coverage_5": -0.03963281610049307,
|
|
"rewards/frontier_ece_reward": 0.010902080871164798,
|
|
"rewards/frontier_entropy_batch_reward": -0.26921272873878477,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16614583134651184,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20694444444444446,
|
|
"signal/accuracy_reward/group_std_mean": 0.2237447142601013,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08307291567325592,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08307291567325592,
|
|
"signal/advantage_abs_mean": 0.11447918117046356,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11447918117046356,
|
|
"signal/advantage_pre_scale_std": 0.17718282341957092,
|
|
"signal/advantage_std": 0.17718282341957092,
|
|
"signal/brier_reward/centered_abs_mean": 0.1887804687023163,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9003472222222222,
|
|
"signal/brier_reward/group_std_mean": 0.23589398562908173,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018878047168254853,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018878047168254853,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03989965319633484,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7913194444444445,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06814380064606666,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003989965561777354,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003989965561777354,
|
|
"signal/format_reward/centered_abs_mean": 0.02988281212747097,
|
|
"signal/format_reward/group_bin_occupancy": 0.1545138888888889,
|
|
"signal/format_reward/group_std_mean": 0.05654868856072426,
|
|
"signal/format_reward/group_zero_std_frac": 0.7638888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014941406063735485,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.014941406063735485,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001725417748093605,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6888888888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0028228630777448415,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.156772206944879e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.156772206944879e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20168729424476622,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26654154658317564,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20168729424476622,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26654154658317564,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20168729424476622,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26654154658317564,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20168729424476622,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26654154658317564,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20168729424476622,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26654154658317564,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20168729424476622,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26654154658317564,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20168729424476622,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26654154658317564,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002521091140806675,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06174532324075699,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8194444444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0827798992395401,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0061745323240756985,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0061745323240756985,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34277850985527036,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.784375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41563859581947327,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03427785262465477,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03427785262465477,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31782818807649266,
|
|
"calibration/batch_distribution_entropy": 0.9713867058593466,
|
|
"calibration/batch_entropy_100bins": 0.9582963574502891,
|
|
"calibration/batch_entropy_10bins": 0.9713867058593466,
|
|
"calibration/batch_entropy_50bins": 0.9697426974943962,
|
|
"calibration/batch_uniqueness": 0.9517092771582559,
|
|
"calibration/buffer_distribution_entropy": 0.865434024133957,
|
|
"calibration/buffer_entropy_100bins": 0.8346670094847349,
|
|
"calibration/buffer_entropy_10bins": 0.865434024133957,
|
|
"calibration/buffer_entropy_50bins": 0.8684468540776882,
|
|
"calibration/confidence_entropy": 0.5156155878893331,
|
|
"calibration/coverage@0%": 0.014809752256277365,
|
|
"calibration/coverage@1%": 0.014809752256277365,
|
|
"calibration/coverage@10%": 0.0197959018407649,
|
|
"calibration/coverage@15%": 0.06292728718017454,
|
|
"calibration/coverage@20%": 0.14343859835284306,
|
|
"calibration/coverage@25%": 0.23316434400479813,
|
|
"calibration/coverage@30%": 0.44321551191720443,
|
|
"calibration/coverage@5%": 0.01647180211777321,
|
|
"calibration/ece": 0.15868943196729363,
|
|
"calibration/mean_confidence": 0.5855180447606176,
|
|
"calibration/prompt_uniqueness": 0.8879748980284203,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.022395833333333327,
|
|
"completions/max_length": 3928.4,
|
|
"completions/max_terminated_length": 3928.4,
|
|
"completions/mean_length": 928.1142456054688,
|
|
"completions/mean_terminated_length": 949.37958984375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 232.8,
|
|
"epoch": 0.16799790002624967,
|
|
"grad_norm": 0.0003384539159014821,
|
|
"learning_rate": 4.156626506024097e-06,
|
|
"loss": -0.0189,
|
|
"num_tokens": 150989143.0,
|
|
"reward": 0.9484674572944641,
|
|
"reward_std": 0.15824552178382872,
|
|
"rewards/accuracy_reward": 0.6380208253860473,
|
|
"rewards/brier_reward": 0.7282225608825683,
|
|
"rewards/confidence_uniqueness_reward": 0.9304485440254211,
|
|
"rewards/format_reward": 0.9771701455116272,
|
|
"rewards/frontier_aurc_reward": -0.0020523122744634747,
|
|
"rewards/frontier_coverage_0": -0.02554969172924757,
|
|
"rewards/frontier_coverage_1": -0.02554969172924757,
|
|
"rewards/frontier_coverage_10": -0.02554969172924757,
|
|
"rewards/frontier_coverage_15": -0.02554969172924757,
|
|
"rewards/frontier_coverage_20": -0.02554969172924757,
|
|
"rewards/frontier_coverage_25": -0.02554969172924757,
|
|
"rewards/frontier_coverage_5": -0.02554969172924757,
|
|
"rewards/frontier_ece_reward": 0.012574984692037106,
|
|
"rewards/frontier_entropy_batch_reward": -0.23991408348083496,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17412109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20763888888888887,
|
|
"signal/accuracy_reward/group_std_mean": 0.22977908849716186,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.33888890147209166,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.087060546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.087060546875,
|
|
"signal/advantage_abs_mean": 0.11712085604667663,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11712085604667663,
|
|
"signal/advantage_pre_scale_std": 0.1813085436820984,
|
|
"signal/advantage_std": 0.1813085436820984,
|
|
"signal/brier_reward/centered_abs_mean": 0.1955260753631592,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8989583333333334,
|
|
"signal/brier_reward/group_std_mean": 0.24234116375446318,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01955260746181011,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01955260746181011,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04565142020583153,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7760416666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07568179368972779,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004565142141655087,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004565142141655087,
|
|
"signal/format_reward/centered_abs_mean": 0.0373426653444767,
|
|
"signal/format_reward/group_bin_occupancy": 0.15694444444444444,
|
|
"signal/format_reward/group_std_mean": 0.06606786623597145,
|
|
"signal/format_reward/group_zero_std_frac": 0.7444444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01867133267223835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01867133267223835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018319447292014957,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6729166666666666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003036691714078188,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.289931035193149e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.289931035193149e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2180047571659088,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8857638888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2845282912254333,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2180047571659088,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8857638888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2845282912254333,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2180047571659088,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8857638888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2845282912254333,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2180047571659088,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8857638888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2845282912254333,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2180047571659088,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8857638888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2845282912254333,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2180047571659088,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8857638888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2845282912254333,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2180047571659088,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8857638888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2845282912254333,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027250594459474085,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06324872821569442,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8138888888888889,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08291356414556503,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0063248731195926665,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0063248731195926665,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3225190699100494,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7753472222222222,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3976904392242432,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03225190676748753,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225190676748753,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23721066123861628,
|
|
"calibration/batch_distribution_entropy": 0.9523450697736399,
|
|
"calibration/batch_entropy_100bins": 0.9462079878353921,
|
|
"calibration/batch_entropy_10bins": 0.9523450697736399,
|
|
"calibration/batch_entropy_50bins": 0.9576539125810644,
|
|
"calibration/batch_uniqueness": 0.9472363243630035,
|
|
"calibration/buffer_distribution_entropy": 0.8763054419438709,
|
|
"calibration/buffer_entropy_100bins": 0.8520378003159907,
|
|
"calibration/buffer_entropy_10bins": 0.8763054419438709,
|
|
"calibration/buffer_entropy_50bins": 0.882087051862643,
|
|
"calibration/confidence_entropy": 0.5010399725323659,
|
|
"calibration/coverage@0%": 0.0074014091601529945,
|
|
"calibration/coverage@1%": 0.0074014091601529945,
|
|
"calibration/coverage@10%": 0.3165677151318652,
|
|
"calibration/coverage@15%": 0.3818958107864078,
|
|
"calibration/coverage@20%": 0.4402204226199299,
|
|
"calibration/coverage@25%": 0.5509094875204943,
|
|
"calibration/coverage@30%": 0.7209270657434209,
|
|
"calibration/coverage@5%": 0.09826302795910861,
|
|
"calibration/ece": 0.1836193855088879,
|
|
"calibration/mean_confidence": 0.6173291182052798,
|
|
"calibration/prompt_uniqueness": 0.8779982358091448,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014236111111111093,
|
|
"completions/max_length": 3881.4,
|
|
"completions/max_terminated_length": 3881.4,
|
|
"completions/mean_length": 888.530126953125,
|
|
"completions/mean_terminated_length": 901.3885498046875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 272.0,
|
|
"epoch": 0.17999775002812465,
|
|
"grad_norm": 0.0003190806892234832,
|
|
"learning_rate": 4.006024096385543e-06,
|
|
"loss": -0.0115,
|
|
"num_tokens": 164289906.0,
|
|
"reward": 0.9812763333320618,
|
|
"reward_std": 0.1455370843410492,
|
|
"rewards/accuracy_reward": 0.6936632037162781,
|
|
"rewards/brier_reward": 0.7690490484237671,
|
|
"rewards/confidence_uniqueness_reward": 0.9352475523948669,
|
|
"rewards/format_reward": 0.9855034828186036,
|
|
"rewards/frontier_aurc_reward": -0.0016912945546209812,
|
|
"rewards/frontier_coverage_0": -0.023174118530005217,
|
|
"rewards/frontier_coverage_1": -0.023174118530005217,
|
|
"rewards/frontier_coverage_10": -0.023174118530005217,
|
|
"rewards/frontier_coverage_15": -0.023174118530005217,
|
|
"rewards/frontier_coverage_20": -0.023174118530005217,
|
|
"rewards/frontier_coverage_25": -0.023174118530005217,
|
|
"rewards/frontier_coverage_5": -0.023174118530005217,
|
|
"rewards/frontier_ece_reward": 0.02147761546075344,
|
|
"rewards/frontier_entropy_batch_reward": -0.2883553385734558,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17032877504825591,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888,
|
|
"signal/accuracy_reward/group_std_mean": 0.2250364065170288,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36388888359069826,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08516438752412796,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08516438752412796,
|
|
"signal/advantage_abs_mean": 0.10705152153968811,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10705152153968811,
|
|
"signal/advantage_pre_scale_std": 0.17231981456279755,
|
|
"signal/advantage_std": 0.17231981456279755,
|
|
"signal/brier_reward/centered_abs_mean": 0.1728837013244629,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/brier_reward/group_std_mean": 0.21893222033977508,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017288370057940483,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017288370057940483,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03745027519762516,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8152777777777779,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06325004473328591,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037450275383889677,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037450275383889677,
|
|
"signal/format_reward/centered_abs_mean": 0.02566731758415699,
|
|
"signal/format_reward/group_bin_occupancy": 0.15069444444444444,
|
|
"signal/format_reward/group_std_mean": 0.04961967393755913,
|
|
"signal/format_reward/group_zero_std_frac": 0.7944444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012833658792078494,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012833658792078494,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017779430374503136,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6729166666666666,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002878274582326412,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2224288841243833e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2224288841243833e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19454073309898376,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26065097451210023,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19454073309898376,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26065097451210023,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19454073309898376,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26065097451210023,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19454073309898376,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26065097451210023,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19454073309898376,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26065097451210023,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19454073309898376,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26065097451210023,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19454073309898376,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26065097451210023,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002431759191676974,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06184743866324425,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7597222222222222,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07970805168151855,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006184743903577328,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006184743903577328,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34849226474761963,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7798611111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.41787471175193786,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034849225729703906,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034849225729703906,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2092779274411888,
|
|
"calibration/batch_distribution_entropy": 0.9617838823741687,
|
|
"calibration/batch_entropy_100bins": 0.950011756254557,
|
|
"calibration/batch_entropy_10bins": 0.9617838823741687,
|
|
"calibration/batch_entropy_50bins": 0.9600976364704195,
|
|
"calibration/batch_uniqueness": 0.9488157255909364,
|
|
"calibration/buffer_distribution_entropy": 0.8853217697278936,
|
|
"calibration/buffer_entropy_100bins": 0.8662557301452016,
|
|
"calibration/buffer_entropy_10bins": 0.8853217697278936,
|
|
"calibration/buffer_entropy_50bins": 0.8928601468050562,
|
|
"calibration/confidence_entropy": 0.4877562323287636,
|
|
"calibration/coverage@0%": 0.026958699293814493,
|
|
"calibration/coverage@1%": 0.026958699293814493,
|
|
"calibration/coverage@10%": 0.26216189051794814,
|
|
"calibration/coverage@15%": 0.533117630289133,
|
|
"calibration/coverage@20%": 0.5696959461704593,
|
|
"calibration/coverage@25%": 0.6466096767515254,
|
|
"calibration/coverage@30%": 0.7402521406135708,
|
|
"calibration/coverage@5%": 0.0507682231033383,
|
|
"calibration/ece": 0.20460225649868646,
|
|
"calibration/mean_confidence": 0.5915577899032075,
|
|
"calibration/prompt_uniqueness": 0.866834247942902,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01684027777777779,
|
|
"completions/max_length": 3994.8,
|
|
"completions/max_terminated_length": 3994.8,
|
|
"completions/mean_length": 904.8212768554688,
|
|
"completions/mean_terminated_length": 920.4162353515625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 252.8,
|
|
"epoch": 0.19199760002999963,
|
|
"grad_norm": 0.00035350897815078497,
|
|
"learning_rate": 3.855421686746989e-06,
|
|
"loss": -0.0139,
|
|
"num_tokens": 177766727.0,
|
|
"reward": 0.9613214373588562,
|
|
"reward_std": 0.1463531583547592,
|
|
"rewards/accuracy_reward": 0.6480034708976745,
|
|
"rewards/brier_reward": 0.7514761567115784,
|
|
"rewards/confidence_uniqueness_reward": 0.9352750539779663,
|
|
"rewards/format_reward": 0.9829861164093018,
|
|
"rewards/frontier_aurc_reward": -0.001857876474969089,
|
|
"rewards/frontier_coverage_0": -0.00018857438117265702,
|
|
"rewards/frontier_coverage_1": -0.00018857438117265702,
|
|
"rewards/frontier_coverage_10": -0.00018857438117265702,
|
|
"rewards/frontier_coverage_15": -0.00018857438117265702,
|
|
"rewards/frontier_coverage_20": -0.00018857438117265702,
|
|
"rewards/frontier_coverage_25": -0.00018857438117265702,
|
|
"rewards/frontier_coverage_5": -0.00018857438117265702,
|
|
"rewards/frontier_ece_reward": 0.018000571243464946,
|
|
"rewards/frontier_entropy_batch_reward": -0.24608825147151947,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17750108242034912,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20868055555555554,
|
|
"signal/accuracy_reward/group_std_mean": 0.2343847006559372,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08875054121017456,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08875054121017456,
|
|
"signal/advantage_abs_mean": 0.10952122509479523,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10952122509479523,
|
|
"signal/advantage_pre_scale_std": 0.16926259696483612,
|
|
"signal/advantage_std": 0.16926259696483612,
|
|
"signal/brier_reward/centered_abs_mean": 0.17829698026180268,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8697916666666667,
|
|
"signal/brier_reward/group_std_mean": 0.22418507933616638,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01782969757914543,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01782969757914543,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03583449199795723,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8104166666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05993206053972244,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035834492649883033,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035834492649883033,
|
|
"signal/format_reward/centered_abs_mean": 0.02592230886220932,
|
|
"signal/format_reward/group_bin_occupancy": 0.15034722222222222,
|
|
"signal/format_reward/group_std_mean": 0.048324061557650566,
|
|
"signal/format_reward/group_zero_std_frac": 0.7972222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01296115443110466,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01296115443110466,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017847379669547081,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6881944444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027909183874726294,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2309225460048766e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2309225460048766e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2202287882566452,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2893898367881775,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2202287882566452,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2893898367881775,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2202287882566452,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2893898367881775,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2202287882566452,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2893898367881775,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2202287882566452,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2893898367881775,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2202287882566452,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2893898367881775,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2202287882566452,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8597222222222222,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2893898367881775,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027528597973287107,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.059304585307836534,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7579861111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07622785717248917,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005930458568036557,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005930458568036557,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3187947154045105,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773611111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39364359378814695,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03187947124242783,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03187947124242783,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20534175522924727,
|
|
"calibration/batch_distribution_entropy": 0.969917266218302,
|
|
"calibration/batch_entropy_100bins": 0.952227152312498,
|
|
"calibration/batch_entropy_10bins": 0.969917266218302,
|
|
"calibration/batch_entropy_50bins": 0.9629528550247466,
|
|
"calibration/batch_uniqueness": 0.9497679294785953,
|
|
"calibration/buffer_distribution_entropy": 0.8954276213227133,
|
|
"calibration/buffer_entropy_100bins": 0.8788468049639422,
|
|
"calibration/buffer_entropy_10bins": 0.8954276213227133,
|
|
"calibration/buffer_entropy_50bins": 0.9029583142175056,
|
|
"calibration/confidence_entropy": 0.47593418558783557,
|
|
"calibration/coverage@0%": 0.01591605755730271,
|
|
"calibration/coverage@1%": 0.01591605755730271,
|
|
"calibration/coverage@10%": 0.13208137867099776,
|
|
"calibration/coverage@15%": 0.39615967842103883,
|
|
"calibration/coverage@20%": 0.5639172130983358,
|
|
"calibration/coverage@25%": 0.7288010871896423,
|
|
"calibration/coverage@30%": 0.8382724713232088,
|
|
"calibration/coverage@5%": 0.020728891781901636,
|
|
"calibration/ece": 0.15006383558851266,
|
|
"calibration/mean_confidence": 0.5456087756371643,
|
|
"calibration/prompt_uniqueness": 0.8688496853362396,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01293402777777779,
|
|
"completions/max_length": 3779.0,
|
|
"completions/max_terminated_length": 3779.0,
|
|
"completions/mean_length": 907.5528686523437,
|
|
"completions/mean_terminated_length": 919.5334716796875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 307.2,
|
|
"epoch": 0.2039974500318746,
|
|
"grad_norm": 0.00032232736703008413,
|
|
"learning_rate": 3.7048192771084342e-06,
|
|
"loss": -0.0112,
|
|
"num_tokens": 191308936.0,
|
|
"reward": 0.9826178789138794,
|
|
"reward_std": 0.14128702878952026,
|
|
"rewards/accuracy_reward": 0.6888020753860473,
|
|
"rewards/brier_reward": 0.7758087396621705,
|
|
"rewards/confidence_uniqueness_reward": 0.9368537425994873,
|
|
"rewards/format_reward": 0.986718761920929,
|
|
"rewards/frontier_aurc_reward": -0.0015183656942099333,
|
|
"rewards/frontier_coverage_0": -0.006013031769543886,
|
|
"rewards/frontier_coverage_1": -0.006013031769543886,
|
|
"rewards/frontier_coverage_10": -0.006013031769543886,
|
|
"rewards/frontier_coverage_15": -0.006013031769543886,
|
|
"rewards/frontier_coverage_20": -0.006013031769543886,
|
|
"rewards/frontier_coverage_25": -0.006013031769543886,
|
|
"rewards/frontier_coverage_5": -0.006013031769543886,
|
|
"rewards/frontier_ece_reward": 0.022591342404484748,
|
|
"rewards/frontier_entropy_batch_reward": -0.2812282383441925,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17081705629825591,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20590277777777777,
|
|
"signal/accuracy_reward/group_std_mean": 0.2259347140789032,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35277777910232544,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08540852814912796,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08540852814912796,
|
|
"signal/advantage_abs_mean": 0.10153568834066391,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10153568834066391,
|
|
"signal/advantage_pre_scale_std": 0.16390545070171356,
|
|
"signal/advantage_std": 0.16390545070171356,
|
|
"signal/brier_reward/centered_abs_mean": 0.1676138609647751,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8409722222222221,
|
|
"signal/brier_reward/group_std_mean": 0.21286478340625764,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016761386021971702,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016761386021971702,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0346285417675972,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7895833333333333,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0630292072892189,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003462854353711009,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003462854353711009,
|
|
"signal/format_reward/centered_abs_mean": 0.02394205704331398,
|
|
"signal/format_reward/group_bin_occupancy": 0.15381944444444445,
|
|
"signal/format_reward/group_std_mean": 0.050843673199415206,
|
|
"signal/format_reward/group_zero_std_frac": 0.7694444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01197102852165699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01197102852165699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016425102250650526,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222222,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026286729145795105,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.053137832263019e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.053137832263019e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21044284403324126,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8260416666666668,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2772384166717529,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21044284403324126,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8260416666666668,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2772384166717529,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21044284403324126,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8260416666666668,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2772384166717529,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21044284403324126,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8260416666666668,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2772384166717529,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21044284403324126,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8260416666666668,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2772384166717529,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21044284403324126,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8260416666666668,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2772384166717529,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21044284403324126,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8260416666666668,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2772384166717529,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026305356062948705,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05771494954824448,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7440972222222222,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07275837063789367,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005771494936197996,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005771494936197996,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3331539690494537,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7829861111111112,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40554880499839785,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0333153985440731,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0333153985440731,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14146831607507013,
|
|
"calibration/batch_distribution_entropy": 0.9788194244148739,
|
|
"calibration/batch_entropy_100bins": 0.9608438585211108,
|
|
"calibration/batch_entropy_10bins": 0.9788194244148739,
|
|
"calibration/batch_entropy_50bins": 0.973365189284614,
|
|
"calibration/batch_uniqueness": 0.9523141972909981,
|
|
"calibration/buffer_distribution_entropy": 0.9043309546471272,
|
|
"calibration/buffer_entropy_100bins": 0.8901910730938212,
|
|
"calibration/buffer_entropy_10bins": 0.9043309546471272,
|
|
"calibration/buffer_entropy_50bins": 0.9122256900315284,
|
|
"calibration/confidence_entropy": 0.4913180920528314,
|
|
"calibration/coverage@0%": 0.0627593206678165,
|
|
"calibration/coverage@1%": 0.11067598733448314,
|
|
"calibration/coverage@10%": 0.41181837997686416,
|
|
"calibration/coverage@15%": 0.6135317774283345,
|
|
"calibration/coverage@20%": 0.7221224409632029,
|
|
"calibration/coverage@25%": 0.8329313631163793,
|
|
"calibration/coverage@30%": 0.9034970794433189,
|
|
"calibration/coverage@5%": 0.2677955394134996,
|
|
"calibration/ece": 0.18102057753858167,
|
|
"calibration/mean_confidence": 0.5288751475055655,
|
|
"calibration/prompt_uniqueness": 0.8661224631367853,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012586805555555558,
|
|
"completions/max_length": 3679.8,
|
|
"completions/max_terminated_length": 3679.8,
|
|
"completions/mean_length": 900.6781494140625,
|
|
"completions/mean_terminated_length": 912.1487060546875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 268.2,
|
|
"epoch": 0.2159973000337496,
|
|
"grad_norm": 0.00032432310399599373,
|
|
"learning_rate": 3.5542168674698798e-06,
|
|
"loss": -0.0095,
|
|
"num_tokens": 204753420.0,
|
|
"reward": 0.9828698992729187,
|
|
"reward_std": 0.13062580227851867,
|
|
"rewards/accuracy_reward": 0.6809027671813965,
|
|
"rewards/brier_reward": 0.7742850184440613,
|
|
"rewards/confidence_uniqueness_reward": 0.9397097945213317,
|
|
"rewards/format_reward": 0.9869791746139527,
|
|
"rewards/frontier_aurc_reward": -0.0012651005061343312,
|
|
"rewards/frontier_coverage_0": -0.0024590507615357637,
|
|
"rewards/frontier_coverage_1": -0.0024590507615357637,
|
|
"rewards/frontier_coverage_10": -0.0024590507615357637,
|
|
"rewards/frontier_coverage_15": -0.0024590507615357637,
|
|
"rewards/frontier_coverage_20": -0.0024590507615357637,
|
|
"rewards/frontier_coverage_25": -0.0024590507615357637,
|
|
"rewards/frontier_coverage_5": -0.0024590507615357637,
|
|
"rewards/frontier_ece_reward": 0.01990157924592495,
|
|
"rewards/frontier_entropy_batch_reward": -0.2422977238893509,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1625976547598839,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2,
|
|
"signal/accuracy_reward/group_std_mean": 0.21232767403125763,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08129882737994194,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08129882737994194,
|
|
"signal/advantage_abs_mean": 0.0974230170249939,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0974230170249939,
|
|
"signal/advantage_pre_scale_std": 0.1547566443681717,
|
|
"signal/advantage_std": 0.1547566443681717,
|
|
"signal/brier_reward/centered_abs_mean": 0.16137183904647828,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8625,
|
|
"signal/brier_reward/group_std_mean": 0.20378568768501282,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01613718457520008,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01613718457520008,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03156536892056465,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.835763888888889,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05195377618074417,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003156536910682917,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003156536910682917,
|
|
"signal/format_reward/centered_abs_mean": 0.02133246473968029,
|
|
"signal/format_reward/group_bin_occupancy": 0.14513888888888887,
|
|
"signal/format_reward/group_std_mean": 0.03966722339391708,
|
|
"signal/format_reward/group_zero_std_frac": 0.8388888835906982,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010666232369840146,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010666232369840146,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012749084737151862,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6826388888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020892760483548047,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.593635715835262e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.593635715835262e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22104499042034148,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2860799193382263,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22104499042034148,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2860799193382263,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22104499042034148,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2860799193382263,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22104499042034148,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2860799193382263,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22104499042034148,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2860799193382263,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22104499042034148,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2860799193382263,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22104499042034148,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8520833333333332,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2860799193382263,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027630624361336233,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05326760783791542,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7274305555555556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06732185631990432,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005326761025935411,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005326761025935411,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31390817165374757,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7652777777777778,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38645762801170347,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03139082007110119,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03139082007110119,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16972575624076253,
|
|
"calibration/batch_distribution_entropy": 0.968708935722565,
|
|
"calibration/batch_entropy_100bins": 0.9597854119619518,
|
|
"calibration/batch_entropy_10bins": 0.968708935722565,
|
|
"calibration/batch_entropy_50bins": 0.9676675002930907,
|
|
"calibration/batch_uniqueness": 0.9505702036499282,
|
|
"calibration/buffer_distribution_entropy": 0.9138201935329576,
|
|
"calibration/buffer_entropy_100bins": 0.9005677377182005,
|
|
"calibration/buffer_entropy_10bins": 0.9138201935329576,
|
|
"calibration/buffer_entropy_50bins": 0.9210036485318283,
|
|
"calibration/confidence_entropy": 0.49206090349416975,
|
|
"calibration/coverage@0%": 0.03164220991292743,
|
|
"calibration/coverage@1%": 0.03164220991292743,
|
|
"calibration/coverage@10%": 0.49474073715796,
|
|
"calibration/coverage@15%": 0.6049408285432979,
|
|
"calibration/coverage@20%": 0.6640028793343454,
|
|
"calibration/coverage@25%": 0.712034532958265,
|
|
"calibration/coverage@30%": 0.7631684293337407,
|
|
"calibration/coverage@5%": 0.2814550861583458,
|
|
"calibration/ece": 0.1706881800323085,
|
|
"calibration/mean_confidence": 0.5701669649175536,
|
|
"calibration/prompt_uniqueness": 0.8606523882209853,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011805555555555559,
|
|
"completions/max_length": 3808.6,
|
|
"completions/max_terminated_length": 3808.6,
|
|
"completions/mean_length": 934.8341186523437,
|
|
"completions/mean_terminated_length": 946.0338989257813,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 274.8,
|
|
"epoch": 0.22799715003562457,
|
|
"grad_norm": 0.0003469188523013145,
|
|
"learning_rate": 3.4036144578313257e-06,
|
|
"loss": -0.0099,
|
|
"num_tokens": 218614389.0,
|
|
"reward": 0.9796857237815857,
|
|
"reward_std": 0.13000356405973434,
|
|
"rewards/accuracy_reward": 0.6736979126930237,
|
|
"rewards/brier_reward": 0.7816197514533997,
|
|
"rewards/confidence_uniqueness_reward": 0.9394507527351379,
|
|
"rewards/format_reward": 0.9880208253860474,
|
|
"rewards/frontier_aurc_reward": -0.0013940044911578298,
|
|
"rewards/frontier_coverage_0": 0.0035892575513571503,
|
|
"rewards/frontier_coverage_1": 0.0035892575513571503,
|
|
"rewards/frontier_coverage_10": 0.0035892575513571503,
|
|
"rewards/frontier_coverage_15": 0.0035892575513571503,
|
|
"rewards/frontier_coverage_20": 0.0035892575513571503,
|
|
"rewards/frontier_coverage_25": 0.0035892575513571503,
|
|
"rewards/frontier_coverage_5": 0.0035892575513571503,
|
|
"rewards/frontier_ece_reward": 0.02155197449028492,
|
|
"rewards/frontier_entropy_batch_reward": -0.25732521414756776,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15018988847732545,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19826388888888888,
|
|
"signal/accuracy_reward/group_std_mean": 0.20334359407424926,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4138889014720917,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07509494423866273,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07509494423866273,
|
|
"signal/advantage_abs_mean": 0.09411217570304871,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09411217570304871,
|
|
"signal/advantage_pre_scale_std": 0.15418358743190766,
|
|
"signal/advantage_std": 0.15418358743190766,
|
|
"signal/brier_reward/centered_abs_mean": 0.14829140901565552,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8458333333333334,
|
|
"signal/brier_reward/group_std_mean": 0.18972561955451966,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014829141087830067,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014829141087830067,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030963774770498276,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8368055555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05300363451242447,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003096377523615956,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003096377523615956,
|
|
"signal/format_reward/centered_abs_mean": 0.02012803815305233,
|
|
"signal/format_reward/group_bin_occupancy": 0.14652777777777778,
|
|
"signal/format_reward/group_std_mean": 0.04024533927440643,
|
|
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010064019076526164,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010064019076526164,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001483507757075131,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222222,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023828324396163226,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8543847727414686e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8543847727414686e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1866928219795227,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24621494710445405,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1866928219795227,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24621494710445405,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1866928219795227,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24621494710445405,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1866928219795227,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24621494710445405,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1866928219795227,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24621494710445405,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1866928219795227,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24621494710445405,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1866928219795227,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8479166666666667,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24621494710445405,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023336603306233885,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.049095044285058974,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7041666666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.062035161256790164,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00490950457751751,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00490950457751751,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31598699688911436,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7607638888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38732577562332154,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03159870021045208,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03159870021045208,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1468086902819562,
|
|
"calibration/batch_distribution_entropy": 0.9644228229125108,
|
|
"calibration/batch_entropy_100bins": 0.951640068404739,
|
|
"calibration/batch_entropy_10bins": 0.9644228229125108,
|
|
"calibration/batch_entropy_50bins": 0.9639704299471431,
|
|
"calibration/batch_uniqueness": 0.9486976020018496,
|
|
"calibration/buffer_distribution_entropy": 0.9181791650650444,
|
|
"calibration/buffer_entropy_100bins": 0.9078529948938329,
|
|
"calibration/buffer_entropy_10bins": 0.9181791650650444,
|
|
"calibration/buffer_entropy_50bins": 0.9265357528580778,
|
|
"calibration/confidence_entropy": 0.48844466923525676,
|
|
"calibration/coverage@0%": 0.059730258076577517,
|
|
"calibration/coverage@1%": 0.059730258076577517,
|
|
"calibration/coverage@10%": 0.44518814058922684,
|
|
"calibration/coverage@15%": 0.5958507215879982,
|
|
"calibration/coverage@20%": 0.7796840966967775,
|
|
"calibration/coverage@25%": 0.868777681347203,
|
|
"calibration/coverage@30%": 0.9167861409796894,
|
|
"calibration/coverage@5%": 0.14907013331665558,
|
|
"calibration/ece": 0.15105889999404112,
|
|
"calibration/mean_confidence": 0.5653978757621623,
|
|
"calibration/prompt_uniqueness": 0.865446176856419,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.017013888888888884,
|
|
"completions/max_length": 3786.0,
|
|
"completions/max_terminated_length": 3786.0,
|
|
"completions/mean_length": 975.0357666015625,
|
|
"completions/mean_terminated_length": 991.8107543945313,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 312.8,
|
|
"epoch": 0.23999700003749952,
|
|
"grad_norm": 0.0003270139859523624,
|
|
"learning_rate": 3.2530120481927713e-06,
|
|
"loss": -0.0123,
|
|
"num_tokens": 232945873.0,
|
|
"reward": 0.9823734283447265,
|
|
"reward_std": 0.1380382299423218,
|
|
"rewards/accuracy_reward": 0.6821180582046509,
|
|
"rewards/brier_reward": 0.8005537033081055,
|
|
"rewards/confidence_uniqueness_reward": 0.9328471422195435,
|
|
"rewards/format_reward": 0.982812511920929,
|
|
"rewards/frontier_aurc_reward": -0.001223186100833118,
|
|
"rewards/frontier_coverage_0": 0.022282357234507798,
|
|
"rewards/frontier_coverage_1": 0.022282357234507798,
|
|
"rewards/frontier_coverage_10": 0.022282357234507798,
|
|
"rewards/frontier_coverage_15": 0.022282357234507798,
|
|
"rewards/frontier_coverage_20": 0.022282357234507798,
|
|
"rewards/frontier_coverage_25": 0.022282357234507798,
|
|
"rewards/frontier_coverage_5": 0.022282357234507798,
|
|
"rewards/frontier_ece_reward": 0.024563415348529814,
|
|
"rewards/frontier_entropy_batch_reward": -0.2782270163297653,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16501736044883727,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20208333333333334,
|
|
"signal/accuracy_reward/group_std_mean": 0.21692525148391723,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08250868022441864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08250868022441864,
|
|
"signal/advantage_abs_mean": 0.1007988765835762,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1007988765835762,
|
|
"signal/advantage_pre_scale_std": 0.16335872411727906,
|
|
"signal/advantage_std": 0.16335872411727906,
|
|
"signal/brier_reward/centered_abs_mean": 0.142218279838562,
|
|
"signal/brier_reward/group_bin_occupancy": 0.820486111111111,
|
|
"signal/brier_reward/group_std_mean": 0.18417510092258454,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014221827685832977,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014221827685832977,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03697417117655277,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8086805555555555,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06256948933005332,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003697417164221406,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003697417164221406,
|
|
"signal/format_reward/centered_abs_mean": 0.02630208320915699,
|
|
"signal/format_reward/group_bin_occupancy": 0.15069444444444446,
|
|
"signal/format_reward/group_std_mean": 0.05004433616995811,
|
|
"signal/format_reward/group_zero_std_frac": 0.794444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013151041604578495,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013151041604578495,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013734675711020827,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6788194444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022251688642427325,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7168344675155824e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7168344675155824e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18763003647327423,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2453687906265259,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18763003647327423,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2453687906265259,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18763003647327423,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2453687906265259,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18763003647327423,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2453687906265259,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18763003647327423,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2453687906265259,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18763003647327423,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2453687906265259,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18763003647327423,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2453687906265259,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023453754372894766,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0460667222738266,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6913194444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.058010222762823103,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0046066722832620146,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0046066722832620146,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3246371805667877,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7493055555555557,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3948457419872284,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032463718205690384,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032463718205690384,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23999700003749952,
|
|
"eval_calibration/aurc": 0.1232624973726325,
|
|
"eval_calibration/batch_distribution_entropy": 0.8948085890484774,
|
|
"eval_calibration/batch_entropy_100bins": 0.7040956292401587,
|
|
"eval_calibration/batch_entropy_10bins": 0.8948085890484774,
|
|
"eval_calibration/batch_entropy_50bins": 0.7832705217175612,
|
|
"eval_calibration/batch_uniqueness": 0.8917035206382241,
|
|
"eval_calibration/buffer_distribution_entropy": 0.922497509059878,
|
|
"eval_calibration/buffer_entropy_100bins": 0.912813983946152,
|
|
"eval_calibration/buffer_entropy_10bins": 0.922497509059878,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9306076485338187,
|
|
"eval_calibration/confidence_entropy": 0.4828748758253563,
|
|
"eval_calibration/coverage@0%": 0.24378360215053765,
|
|
"eval_calibration/coverage@1%": 0.24378360215053765,
|
|
"eval_calibration/coverage@10%": 0.6001344086021505,
|
|
"eval_calibration/coverage@15%": 0.7688172043010754,
|
|
"eval_calibration/coverage@20%": 0.8741599462365591,
|
|
"eval_calibration/coverage@25%": 0.9321236559139785,
|
|
"eval_calibration/coverage@30%": 0.9895833333333334,
|
|
"eval_calibration/coverage@5%": 0.2921706989247312,
|
|
"eval_calibration/ece": 0.22508465745817566,
|
|
"eval_calibration/mean_confidence": 0.6127040160956362,
|
|
"eval_calibration/prompt_uniqueness": 0.8917035206382241,
|
|
"eval_completions/clipped_ratio": 0.010416666666666649,
|
|
"eval_completions/max_length": 2943.5,
|
|
"eval_completions/max_terminated_length": 2943.5,
|
|
"eval_completions/mean_length": 961.2862955729166,
|
|
"eval_completions/mean_terminated_length": 971.3759256998698,
|
|
"eval_completions/min_length": 71.0,
|
|
"eval_completions/min_terminated_length": 352.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 232945873.0,
|
|
"eval_reward": 0.9052010973294576,
|
|
"eval_reward_std": 0.23245403667291006,
|
|
"eval_rewards/accuracy_reward": 0.6796875,
|
|
"eval_rewards/brier_reward": 0.7887685497601827,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8840933938821157,
|
|
"eval_rewards/format_reward": 0.9878472089767456,
|
|
"eval_rewards/frontier_aurc_reward": -0.0014447161132314552,
|
|
"eval_rewards/frontier_coverage_0": 0.0102703048226734,
|
|
"eval_rewards/frontier_coverage_1": 0.0102703048226734,
|
|
"eval_rewards/frontier_coverage_10": 0.0102703048226734,
|
|
"eval_rewards/frontier_coverage_15": 0.0102703048226734,
|
|
"eval_rewards/frontier_coverage_20": 0.0102703048226734,
|
|
"eval_rewards/frontier_coverage_25": 0.0102703048226734,
|
|
"eval_rewards/frontier_coverage_5": 0.0102703048226734,
|
|
"eval_rewards/frontier_ece_reward": 0.020516497393449146,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9878472089767456,
|
|
"eval_runtime": 211.7743,
|
|
"eval_samples_per_second": 4.722,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4166124115387599,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4618180791536967,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20830620576937994,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20830620576937994,
|
|
"eval_signal/advantage_abs_mean": 0.19841948399941126,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.19841948399941126,
|
|
"eval_signal/advantage_pre_scale_std": 0.23134330163399378,
|
|
"eval_signal/advantage_std": 0.23134330163399378,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19762666523456573,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8819444444444445,
|
|
"eval_signal/brier_reward/group_std_mean": 0.25263360391060513,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01976266720642646,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01976266720642646,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05467540336151918,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3993055555555556,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09214186668395996,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005467540351673961,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005467540351673961,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.023328992693374555,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.16666666666666666,
|
|
"eval_signal/format_reward/group_std_mean": 0.06276767669866483,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.6666666915019354,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011664496346687278,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.011664496346687278,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002142787619959563,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6770833333333334,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0038993366761133075,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6784844218733877e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6784844218733877e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.26335882892211276,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9409722222222223,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.3692873766024907,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.26335882892211276,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9409722222222223,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3692873766024907,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.26335882892211276,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9409722222222223,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.3692873766024907,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.26335882892211276,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9409722222222223,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3692873766024907,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.26335882892211276,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9409722222222223,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3692873766024907,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.26335882892211276,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9409722222222223,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3692873766024907,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.26335882892211276,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9409722222222223,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.3692873766024907,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003291985446897646,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05410987697541714,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9131944444444445,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.06782141576210658,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005410987806196014,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005410987806196014,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.023328992693374555,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.16666666666666666,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.06276767669866483,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666915019354,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.002332899389633288,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002332899389633288,
|
|
"eval_steps_per_second": 0.028,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2851160668444922,
|
|
"calibration/batch_distribution_entropy": 0.9671729214309087,
|
|
"calibration/batch_entropy_100bins": 0.9543574644103542,
|
|
"calibration/batch_entropy_10bins": 0.9671729214309087,
|
|
"calibration/batch_entropy_50bins": 0.9636134029781351,
|
|
"calibration/batch_uniqueness": 0.9495695699430952,
|
|
"calibration/buffer_distribution_entropy": 0.924664049380876,
|
|
"calibration/buffer_entropy_100bins": 0.915730900885403,
|
|
"calibration/buffer_entropy_10bins": 0.924664049380876,
|
|
"calibration/buffer_entropy_50bins": 0.9328724668779979,
|
|
"calibration/confidence_entropy": 0.49600341279363536,
|
|
"calibration/coverage@0%": 0.060966568007277976,
|
|
"calibration/coverage@1%": 0.08668835278418087,
|
|
"calibration/coverage@10%": 0.13970672548759294,
|
|
"calibration/coverage@15%": 0.26881728379574177,
|
|
"calibration/coverage@20%": 0.3661078923001498,
|
|
"calibration/coverage@25%": 0.4303547684019874,
|
|
"calibration/coverage@30%": 0.5357389720911219,
|
|
"calibration/coverage@5%": 0.12290882522512576,
|
|
"calibration/ece": 0.1526015826672329,
|
|
"calibration/mean_confidence": 0.5764491669243426,
|
|
"calibration/prompt_uniqueness": 0.861689626065415,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.020833333333333325,
|
|
"completions/max_length": 3851.2,
|
|
"completions/max_terminated_length": 3851.2,
|
|
"completions/mean_length": 966.7627685546875,
|
|
"completions/mean_terminated_length": 987.4223266601563,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 296.6,
|
|
"epoch": 0.2519968500393745,
|
|
"grad_norm": 0.0003603503864724189,
|
|
"learning_rate": 3.1024096385542172e-06,
|
|
"loss": -0.0174,
|
|
"num_tokens": 247159844.0,
|
|
"reward": 0.9762218475341797,
|
|
"reward_std": 0.13867290019989015,
|
|
"rewards/accuracy_reward": 0.6809895873069763,
|
|
"rewards/brier_reward": 0.7864728808403015,
|
|
"rewards/confidence_uniqueness_reward": 0.929021692276001,
|
|
"rewards/format_reward": 0.9790798664093018,
|
|
"rewards/frontier_aurc_reward": -0.0012768813758157194,
|
|
"rewards/frontier_coverage_0": 0.010393311083316804,
|
|
"rewards/frontier_coverage_1": 0.010393311083316804,
|
|
"rewards/frontier_coverage_10": 0.010393311083316804,
|
|
"rewards/frontier_coverage_15": 0.010393311083316804,
|
|
"rewards/frontier_coverage_20": 0.010393311083316804,
|
|
"rewards/frontier_coverage_25": 0.010393311083316804,
|
|
"rewards/frontier_coverage_5": 0.010393311083316804,
|
|
"rewards/frontier_ece_reward": 0.020966623350977896,
|
|
"rewards/frontier_entropy_batch_reward": -0.283524689078331,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1553656652569771,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20173611111111106,
|
|
"signal/accuracy_reward/group_std_mean": 0.210291787981987,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07768283262848855,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07768283262848855,
|
|
"signal/advantage_abs_mean": 0.10010195821523667,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10010195821523667,
|
|
"signal/advantage_pre_scale_std": 0.16518112421035766,
|
|
"signal/advantage_std": 0.16518112421035766,
|
|
"signal/brier_reward/centered_abs_mean": 0.14650782942771912,
|
|
"signal/brier_reward/group_bin_occupancy": 0.825,
|
|
"signal/brier_reward/group_std_mean": 0.18895745873451233,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014650783315300942,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014650783315300942,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04202488660812378,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8017361111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0678424745798111,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004202488483861089,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004202488483861089,
|
|
"signal/format_reward/centered_abs_mean": 0.03167860247194767,
|
|
"signal/format_reward/group_bin_occupancy": 0.15208333333333332,
|
|
"signal/format_reward/group_std_mean": 0.05567045882344246,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333611488342,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.015839301235973834,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.015839301235973834,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014219350181519986,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.671875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023063634755089877,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7774187836039345e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7774187836039345e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18595612347126006,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24536578357219696,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18595612347126006,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24536578357219696,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18595612347126006,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24536578357219696,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18595612347126006,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24536578357219696,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18595612347126006,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24536578357219696,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18595612347126006,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24536578357219696,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18595612347126006,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24536578357219696,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023244516225531696,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04364292547106743,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6725694444444444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05495603755116463,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.002777777798473835,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004364292602986097,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004364292602986097,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32798747420310975,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7527777777777777,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39967008829116824,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03279874660074711,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03279874660074711,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1483170665199357,
|
|
"calibration/batch_distribution_entropy": 0.9387725486299472,
|
|
"calibration/batch_entropy_100bins": 0.9385953694946148,
|
|
"calibration/batch_entropy_10bins": 0.9387725486299472,
|
|
"calibration/batch_entropy_50bins": 0.947461516265483,
|
|
"calibration/batch_uniqueness": 0.9434469786987381,
|
|
"calibration/buffer_distribution_entropy": 0.9278483560380801,
|
|
"calibration/buffer_entropy_100bins": 0.9212316856289691,
|
|
"calibration/buffer_entropy_10bins": 0.9278483560380801,
|
|
"calibration/buffer_entropy_50bins": 0.9368451531102391,
|
|
"calibration/confidence_entropy": 0.4558293825239885,
|
|
"calibration/coverage@0%": 0.059290959926182385,
|
|
"calibration/coverage@1%": 0.059290959926182385,
|
|
"calibration/coverage@10%": 0.36697772902705544,
|
|
"calibration/coverage@15%": 0.5974388122980647,
|
|
"calibration/coverage@20%": 0.7156884661286804,
|
|
"calibration/coverage@25%": 0.8580991124280738,
|
|
"calibration/coverage@30%": 0.956633186340756,
|
|
"calibration/coverage@5%": 0.245650904176041,
|
|
"calibration/ece": 0.13348184934245935,
|
|
"calibration/mean_confidence": 0.607258804172381,
|
|
"calibration/prompt_uniqueness": 0.8473293014447923,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015538194444444441,
|
|
"completions/max_length": 3861.8,
|
|
"completions/max_terminated_length": 3861.8,
|
|
"completions/mean_length": 1001.0733642578125,
|
|
"completions/mean_terminated_length": 1016.8860473632812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 303.6,
|
|
"epoch": 0.2639967000412495,
|
|
"grad_norm": 0.00029901484958827496,
|
|
"learning_rate": 2.9518072289156627e-06,
|
|
"loss": -0.0131,
|
|
"num_tokens": 261800657.0,
|
|
"reward": 0.9921531319618225,
|
|
"reward_std": 0.13675991892814637,
|
|
"rewards/accuracy_reward": 0.7078125,
|
|
"rewards/brier_reward": 0.7964969754219056,
|
|
"rewards/confidence_uniqueness_reward": 0.9329366683959961,
|
|
"rewards/format_reward": 0.9843749880790711,
|
|
"rewards/frontier_aurc_reward": -0.0011116554378531873,
|
|
"rewards/frontier_coverage_0": 0.0021981429308652878,
|
|
"rewards/frontier_coverage_1": 0.0021981429308652878,
|
|
"rewards/frontier_coverage_10": 0.0021981429308652878,
|
|
"rewards/frontier_coverage_15": 0.0021981429308652878,
|
|
"rewards/frontier_coverage_20": 0.0021981429308652878,
|
|
"rewards/frontier_coverage_25": 0.0021981429308652878,
|
|
"rewards/frontier_coverage_5": 0.0021981429308652878,
|
|
"rewards/frontier_ece_reward": 0.02157861925661564,
|
|
"rewards/frontier_entropy_batch_reward": -0.2922031283378601,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16134982705116271,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888,
|
|
"signal/accuracy_reward/group_std_mean": 0.21803545951843262,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08067491352558136,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08067491352558136,
|
|
"signal/advantage_abs_mean": 0.09815683215856552,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09815683215856552,
|
|
"signal/advantage_pre_scale_std": 0.16332031190395355,
|
|
"signal/advantage_std": 0.16332031190395355,
|
|
"signal/brier_reward/centered_abs_mean": 0.14194732010364533,
|
|
"signal/brier_reward/group_bin_occupancy": 0.80625,
|
|
"signal/brier_reward/group_std_mean": 0.18631795942783355,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014194732159376144,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014194732159376144,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03753194957971573,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0628928780555725,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037531950045377018,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037531950045377018,
|
|
"signal/format_reward/centered_abs_mean": 0.02554253451526165,
|
|
"signal/format_reward/group_bin_occupancy": 0.15034722222222224,
|
|
"signal/format_reward/group_std_mean": 0.04873799011111259,
|
|
"signal/format_reward/group_zero_std_frac": 0.7972222328186035,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012771267257630826,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012771267257630826,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013877948513254523,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.685763888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022338322829455136,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7347435641568153e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7347435641568153e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18613446354866028,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8159722222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24784242510795593,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18613446354866028,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8159722222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24784242510795593,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18613446354866028,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8159722222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24784242510795593,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18613446354866028,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8159722222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24784242510795593,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18613446354866028,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8159722222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24784242510795593,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18613446354866028,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8159722222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24784242510795593,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18613446354866028,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8159722222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24784242510795593,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023266808595508335,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04411152824759483,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.671875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05458846464753151,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004411152843385935,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004411152843385935,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32641210556030276,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7527777777777778,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39622201919555666,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03264121115207672,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03264121115207672,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28311624676865715,
|
|
"calibration/batch_distribution_entropy": 0.9819046158374496,
|
|
"calibration/batch_entropy_100bins": 0.9621165215133629,
|
|
"calibration/batch_entropy_10bins": 0.9819046158374496,
|
|
"calibration/batch_entropy_50bins": 0.9711147626556473,
|
|
"calibration/batch_uniqueness": 0.9519674283199551,
|
|
"calibration/buffer_distribution_entropy": 0.9312139349056319,
|
|
"calibration/buffer_entropy_100bins": 0.9261870789929871,
|
|
"calibration/buffer_entropy_10bins": 0.9312139349056319,
|
|
"calibration/buffer_entropy_50bins": 0.9405647963056272,
|
|
"calibration/confidence_entropy": 0.5083362254533048,
|
|
"calibration/coverage@0%": 0.008015547191444707,
|
|
"calibration/coverage@1%": 0.008015547191444707,
|
|
"calibration/coverage@10%": 0.08324067443727931,
|
|
"calibration/coverage@15%": 0.3029031354053958,
|
|
"calibration/coverage@20%": 0.4227714752431936,
|
|
"calibration/coverage@25%": 0.5560391535985508,
|
|
"calibration/coverage@30%": 0.6467141986346506,
|
|
"calibration/coverage@5%": 0.008015547191444707,
|
|
"calibration/ece": 0.20618875522590968,
|
|
"calibration/mean_confidence": 0.5348565191388408,
|
|
"calibration/prompt_uniqueness": 0.8555982164395447,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.023524305555555534,
|
|
"completions/max_length": 3952.4,
|
|
"completions/max_terminated_length": 3952.4,
|
|
"completions/mean_length": 1017.3613525390625,
|
|
"completions/mean_terminated_length": 1042.0224365234376,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 300.4,
|
|
"epoch": 0.27599655004312446,
|
|
"grad_norm": 0.00028006560751236975,
|
|
"learning_rate": 2.8012048192771087e-06,
|
|
"loss": -0.0197,
|
|
"num_tokens": 276599860.0,
|
|
"reward": 0.9687199473381043,
|
|
"reward_std": 0.1485589861869812,
|
|
"rewards/accuracy_reward": 0.664843738079071,
|
|
"rewards/brier_reward": 0.7744507193565369,
|
|
"rewards/confidence_uniqueness_reward": 0.9282928586006165,
|
|
"rewards/format_reward": 0.9762152791023254,
|
|
"rewards/frontier_aurc_reward": -0.0013954649912193418,
|
|
"rewards/frontier_coverage_0": 0.012234875041758641,
|
|
"rewards/frontier_coverage_1": 0.012234875041758641,
|
|
"rewards/frontier_coverage_10": 0.012234875041758641,
|
|
"rewards/frontier_coverage_15": 0.012234875041758641,
|
|
"rewards/frontier_coverage_20": 0.012234875041758641,
|
|
"rewards/frontier_coverage_25": 0.012234875041758641,
|
|
"rewards/frontier_coverage_5": 0.012234875041758641,
|
|
"rewards/frontier_ece_reward": 0.015254579298198224,
|
|
"rewards/frontier_entropy_batch_reward": -0.2466247111558914,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16998155415058136,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2041666666666667,
|
|
"signal/accuracy_reward/group_std_mean": 0.22179057002067565,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3666666686534882,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08499077707529068,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08499077707529068,
|
|
"signal/advantage_abs_mean": 0.10761507451534272,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10761507451534272,
|
|
"signal/advantage_pre_scale_std": 0.17629152834415435,
|
|
"signal/advantage_std": 0.17629152834415435,
|
|
"signal/brier_reward/centered_abs_mean": 0.1553166389465332,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8298611111111113,
|
|
"signal/brier_reward/group_std_mean": 0.20035399496555328,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015531663782894611,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015531663782894611,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04595714658498764,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7524305555555556,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07812547087669372,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004595714528113604,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004595714528113604,
|
|
"signal/format_reward/centered_abs_mean": 0.03736979141831398,
|
|
"signal/format_reward/group_bin_occupancy": 0.15868055555555555,
|
|
"signal/format_reward/group_std_mean": 0.06836798191070556,
|
|
"signal/format_reward/group_zero_std_frac": 0.7305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01868489570915699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01868489570915699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014289145823568107,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6930555555555555,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002272111759521067,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7861432934296317e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7861432934296317e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19874320030212403,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8319444444444445,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.259122833609581,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19874320030212403,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8319444444444445,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.259122833609581,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19874320030212403,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8319444444444445,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.259122833609581,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19874320030212403,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8319444444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.259122833609581,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19874320030212403,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8319444444444445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.259122833609581,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19874320030212403,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8319444444444445,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.259122833609581,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19874320030212403,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8319444444444445,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.259122833609581,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024842898827046158,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.040270973742008206,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6878472222222223,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05081784054636955,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004027097299695015,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004027097299695015,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31121625900268557,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7447916666666666,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3846454739570618,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031121626123785974,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031121626123785974,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2595476812471177,
|
|
"calibration/batch_distribution_entropy": 0.962110799023046,
|
|
"calibration/batch_entropy_100bins": 0.955106701333807,
|
|
"calibration/batch_entropy_10bins": 0.962110799023046,
|
|
"calibration/batch_entropy_50bins": 0.9644720183778576,
|
|
"calibration/batch_uniqueness": 0.9499307501739891,
|
|
"calibration/buffer_distribution_entropy": 0.9355022742156173,
|
|
"calibration/buffer_entropy_100bins": 0.9312474850497697,
|
|
"calibration/buffer_entropy_10bins": 0.9355022742156173,
|
|
"calibration/buffer_entropy_50bins": 0.9447033893558565,
|
|
"calibration/confidence_entropy": 0.48475043392478445,
|
|
"calibration/coverage@0%": 0.011083498287003456,
|
|
"calibration/coverage@1%": 0.011083498287003456,
|
|
"calibration/coverage@10%": 0.3635380791890612,
|
|
"calibration/coverage@15%": 0.4410849483790093,
|
|
"calibration/coverage@20%": 0.5160072720239822,
|
|
"calibration/coverage@25%": 0.5471758299142111,
|
|
"calibration/coverage@30%": 0.5812770248801581,
|
|
"calibration/coverage@5%": 0.10752677005481348,
|
|
"calibration/ece": 0.1552948007514812,
|
|
"calibration/mean_confidence": 0.5953624452801337,
|
|
"calibration/prompt_uniqueness": 0.8642060771148008,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01796875,
|
|
"completions/max_length": 3848.0,
|
|
"completions/max_terminated_length": 3848.0,
|
|
"completions/mean_length": 1007.0320190429687,
|
|
"completions/mean_terminated_length": 1025.3716430664062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 343.6,
|
|
"epoch": 0.28799640004499943,
|
|
"grad_norm": 0.00025856465799733996,
|
|
"learning_rate": 2.6506024096385547e-06,
|
|
"loss": -0.0155,
|
|
"num_tokens": 291282725.0,
|
|
"reward": 0.978645408153534,
|
|
"reward_std": 0.13838702142238618,
|
|
"rewards/accuracy_reward": 0.6781249880790711,
|
|
"rewards/brier_reward": 0.7952387213706971,
|
|
"rewards/confidence_uniqueness_reward": 0.9324679374694824,
|
|
"rewards/format_reward": 0.98203125,
|
|
"rewards/frontier_aurc_reward": -0.0012951105483807624,
|
|
"rewards/frontier_coverage_0": 0.018445078120566904,
|
|
"rewards/frontier_coverage_1": 0.018445078120566904,
|
|
"rewards/frontier_coverage_10": 0.018445078120566904,
|
|
"rewards/frontier_coverage_15": 0.018445078120566904,
|
|
"rewards/frontier_coverage_20": 0.018445078120566904,
|
|
"rewards/frontier_coverage_25": 0.018445078120566904,
|
|
"rewards/frontier_coverage_5": 0.018445078120566904,
|
|
"rewards/frontier_ece_reward": 0.018771170079708098,
|
|
"rewards/frontier_entropy_batch_reward": -0.27678276896476744,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.16322699785232545,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20208333333333334,
|
|
"signal/accuracy_reward/group_std_mean": 0.21511903703212737,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3833333432674408,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08161349892616272,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08161349892616272,
|
|
"signal/advantage_abs_mean": 0.10217539519071579,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10217539519071579,
|
|
"signal/advantage_pre_scale_std": 0.16570760905742646,
|
|
"signal/advantage_std": 0.16570760905742646,
|
|
"signal/brier_reward/centered_abs_mean": 0.14061089158058165,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/brier_reward/group_std_mean": 0.18166620433330535,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014061089418828488,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014061089418828488,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.038473252952098844,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8069444444444445,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.062386732548475266,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038473252672702072,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038473252672702072,
|
|
"signal/format_reward/centered_abs_mean": 0.02782660610973835,
|
|
"signal/format_reward/group_bin_occupancy": 0.14930555555555555,
|
|
"signal/format_reward/group_std_mean": 0.04968899041414261,
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013913303054869175,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013913303054869175,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014574224362149835,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.690625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023057571612298488,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8217780234408564e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8217780234408564e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1772557020187378,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8270833333333332,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2352249562740326,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1772557020187378,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8270833333333332,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2352249562740326,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1772557020187378,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8270833333333332,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2352249562740326,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1772557020187378,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8270833333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2352249562740326,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1772557020187378,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8270833333333332,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2352249562740326,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1772557020187378,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8270833333333332,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2352249562740326,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1772557020187378,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8270833333333332,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2352249562740326,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022156964056193828,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03890540599822998,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6711805555555557,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.048710108548402783,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038905406836420298,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038905406836420298,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3223755657672882,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7454861111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39308597445487975,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03223755843937397,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03223755843937397,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15175947975368032,
|
|
"calibration/batch_distribution_entropy": 0.9376645983193402,
|
|
"calibration/batch_entropy_100bins": 0.9415076330475506,
|
|
"calibration/batch_entropy_10bins": 0.9376645983193402,
|
|
"calibration/batch_entropy_50bins": 0.9499838957016375,
|
|
"calibration/batch_uniqueness": 0.9455843743770942,
|
|
"calibration/buffer_distribution_entropy": 0.9375868310371841,
|
|
"calibration/buffer_entropy_100bins": 0.9349363755430339,
|
|
"calibration/buffer_entropy_10bins": 0.9375868310371841,
|
|
"calibration/buffer_entropy_50bins": 0.9473378210710391,
|
|
"calibration/confidence_entropy": 0.5195516841707571,
|
|
"calibration/coverage@0%": 0.035550694075693494,
|
|
"calibration/coverage@1%": 0.035550694075693494,
|
|
"calibration/coverage@10%": 0.38478661557597077,
|
|
"calibration/coverage@15%": 0.5340862971466895,
|
|
"calibration/coverage@20%": 0.6252700952489321,
|
|
"calibration/coverage@25%": 0.8642236051281177,
|
|
"calibration/coverage@30%": 0.9356544982329478,
|
|
"calibration/coverage@5%": 0.24815686443079188,
|
|
"calibration/ece": 0.13946983254300122,
|
|
"calibration/mean_confidence": 0.6065808418851792,
|
|
"calibration/prompt_uniqueness": 0.8611937174457729,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016579861111111115,
|
|
"completions/max_length": 4033.4,
|
|
"completions/max_terminated_length": 4033.4,
|
|
"completions/mean_length": 1023.8368286132812,
|
|
"completions/mean_terminated_length": 1041.2441528320312,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 312.0,
|
|
"epoch": 0.2999962500468744,
|
|
"grad_norm": 0.0002858802326954901,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": -0.0149,
|
|
"num_tokens": 306194989.0,
|
|
"reward": 0.9803131580352783,
|
|
"reward_std": 0.13643481433391572,
|
|
"rewards/accuracy_reward": 0.6823784708976746,
|
|
"rewards/brier_reward": 0.805714464187622,
|
|
"rewards/confidence_uniqueness_reward": 0.9327924966812133,
|
|
"rewards/format_reward": 0.9832465291023255,
|
|
"rewards/frontier_aurc_reward": -0.0010212866007350385,
|
|
"rewards/frontier_coverage_0": 0.016784844733774663,
|
|
"rewards/frontier_coverage_1": 0.016784844733774663,
|
|
"rewards/frontier_coverage_10": 0.016784844733774663,
|
|
"rewards/frontier_coverage_15": 0.016784844733774663,
|
|
"rewards/frontier_coverage_20": 0.016784844733774663,
|
|
"rewards/frontier_coverage_25": 0.016784844733774663,
|
|
"rewards/frontier_coverage_5": 0.016784844733774663,
|
|
"rewards/frontier_ece_reward": 0.016746819019317627,
|
|
"rewards/frontier_entropy_batch_reward": -0.29480605721473696,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15901150405406952,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19895833333333335,
|
|
"signal/accuracy_reward/group_std_mean": 0.20940764546394347,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40833333134651184,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07950575202703476,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07950575202703476,
|
|
"signal/advantage_abs_mean": 0.10095046162605285,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10095046162605285,
|
|
"signal/advantage_pre_scale_std": 0.16176269948482513,
|
|
"signal/advantage_std": 0.16176269948482513,
|
|
"signal/brier_reward/centered_abs_mean": 0.1315429389476776,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8329861111111111,
|
|
"signal/brier_reward/group_std_mean": 0.17215375006198883,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013154294155538083,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013154294155538083,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03707269802689552,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.815625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06010228767991066,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003707269812002778,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003707269812002778,
|
|
"signal/format_reward/centered_abs_mean": 0.02627495713531971,
|
|
"signal/format_reward/group_bin_occupancy": 0.14861111111111114,
|
|
"signal/format_reward/group_std_mean": 0.047227922827005386,
|
|
"signal/format_reward/group_zero_std_frac": 0.8111111044883728,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013137478567659854,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.013137478567659854,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011868951609358192,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6777777777777778,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019993403926491736,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.4836190712230745e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.4836190712230745e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1744101345539093,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22863954305648804,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1744101345539093,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22863954305648804,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1744101345539093,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22863954305648804,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1744101345539093,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22863954305648804,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1744101345539093,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22863954305648804,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1744101345539093,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22863954305648804,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1744101345539093,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8472222222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22863954305648804,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021801266819238664,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0351276122033596,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6805555555555556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04449153020977974,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035127611830830575,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035127611830830575,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33887292742729186,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40577629804611204,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033887290954589845,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033887290954589845,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.189901764038449,
|
|
"calibration/batch_distribution_entropy": 0.9520721871515259,
|
|
"calibration/batch_entropy_100bins": 0.9454743219955299,
|
|
"calibration/batch_entropy_10bins": 0.9520721871515259,
|
|
"calibration/batch_entropy_50bins": 0.9555705289341347,
|
|
"calibration/batch_uniqueness": 0.9463924227699888,
|
|
"calibration/buffer_distribution_entropy": 0.9391385228864317,
|
|
"calibration/buffer_entropy_100bins": 0.9380882383339472,
|
|
"calibration/buffer_entropy_10bins": 0.9391385228864317,
|
|
"calibration/buffer_entropy_50bins": 0.9495040517659845,
|
|
"calibration/confidence_entropy": 0.47537128935550743,
|
|
"calibration/coverage@0%": 0.023279596393052857,
|
|
"calibration/coverage@1%": 0.023279596393052857,
|
|
"calibration/coverage@10%": 0.26430076626209964,
|
|
"calibration/coverage@15%": 0.4489382136469204,
|
|
"calibration/coverage@20%": 0.6147929671232751,
|
|
"calibration/coverage@25%": 0.7571622485068487,
|
|
"calibration/coverage@30%": 0.8580091429839086,
|
|
"calibration/coverage@5%": 0.14513451380368644,
|
|
"calibration/ece": 0.08148051823192345,
|
|
"calibration/mean_confidence": 0.5889737206246585,
|
|
"calibration/prompt_uniqueness": 0.8531980149590808,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02743055555555556,
|
|
"completions/max_length": 3838.8,
|
|
"completions/max_terminated_length": 3838.8,
|
|
"completions/mean_length": 1067.0234375,
|
|
"completions/mean_terminated_length": 1097.4159790039062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 281.8,
|
|
"epoch": 0.3119961000487494,
|
|
"grad_norm": 0.00029900847584940493,
|
|
"learning_rate": 2.349397590361446e-06,
|
|
"loss": -0.0208,
|
|
"num_tokens": 321611899.0,
|
|
"reward": 0.9622433066368103,
|
|
"reward_std": 0.14578649401664734,
|
|
"rewards/accuracy_reward": 0.6621527791023254,
|
|
"rewards/brier_reward": 0.7844874382019043,
|
|
"rewards/confidence_uniqueness_reward": 0.9220524430274963,
|
|
"rewards/format_reward": 0.9722222208976745,
|
|
"rewards/frontier_aurc_reward": -0.0012491632485762238,
|
|
"rewards/frontier_coverage_0": 0.018586619477719068,
|
|
"rewards/frontier_coverage_1": 0.018586619477719068,
|
|
"rewards/frontier_coverage_10": 0.018586619477719068,
|
|
"rewards/frontier_coverage_15": 0.018586619477719068,
|
|
"rewards/frontier_coverage_20": 0.018586619477719068,
|
|
"rewards/frontier_coverage_25": 0.018586619477719068,
|
|
"rewards/frontier_coverage_5": 0.018586619477719068,
|
|
"rewards/frontier_ece_reward": 0.015250921249389648,
|
|
"rewards/frontier_entropy_batch_reward": -0.287339860200882,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17348090708255767,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20381944444444447,
|
|
"signal/accuracy_reward/group_std_mean": 0.22585095167160035,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08674045354127884,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08674045354127884,
|
|
"signal/advantage_abs_mean": 0.11120370775461197,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11120370775461197,
|
|
"signal/advantage_pre_scale_std": 0.1740236759185791,
|
|
"signal/advantage_std": 0.1740236759185791,
|
|
"signal/brier_reward/centered_abs_mean": 0.14372893869876863,
|
|
"signal/brier_reward/group_bin_occupancy": 0.836111111111111,
|
|
"signal/brier_reward/group_std_mean": 0.18368545174598694,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01437289360910654,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01437289360910654,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04728544950485229,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8177083333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06920376718044281,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004728544875979424,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004728544875979424,
|
|
"signal/format_reward/centered_abs_mean": 0.03650173675268888,
|
|
"signal/format_reward/group_bin_occupancy": 0.14930555555555555,
|
|
"signal/format_reward/group_std_mean": 0.056186852231621745,
|
|
"signal/format_reward/group_zero_std_frac": 0.8055555701255799,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01825086837634444,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01825086837634444,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001344931242056191,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6895833333333334,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021691116970032455,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.68116404893226e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.68116404893226e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1911786049604416,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8354166666666666,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2510842025279999,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1911786049604416,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8354166666666666,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2510842025279999,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1911786049604416,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8354166666666666,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2510842025279999,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1911786049604416,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8354166666666666,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2510842025279999,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1911786049604416,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8354166666666666,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2510842025279999,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1911786049604416,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8354166666666666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2510842025279999,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1911786049604416,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8354166666666666,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2510842025279999,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023897326085716487,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.035830476135015485,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6923611111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04510410130023956,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003583047725260258,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003583047725260258,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32898640632629395,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7326388888888888,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39836318492889405,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03289864137768746,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03289864137768746,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1919401835649107,
|
|
"calibration/batch_distribution_entropy": 0.9360961840279008,
|
|
"calibration/batch_entropy_100bins": 0.9377248130148897,
|
|
"calibration/batch_entropy_10bins": 0.9360961840279008,
|
|
"calibration/batch_entropy_50bins": 0.9453320580648233,
|
|
"calibration/batch_uniqueness": 0.9424926809265267,
|
|
"calibration/buffer_distribution_entropy": 0.940487986587587,
|
|
"calibration/buffer_entropy_100bins": 0.9409743683693346,
|
|
"calibration/buffer_entropy_10bins": 0.940487986587587,
|
|
"calibration/buffer_entropy_50bins": 0.9514574364760401,
|
|
"calibration/confidence_entropy": 0.4645515195275509,
|
|
"calibration/coverage@0%": 0.05197618847080752,
|
|
"calibration/coverage@1%": 0.05197618847080752,
|
|
"calibration/coverage@10%": 0.33362163408868134,
|
|
"calibration/coverage@15%": 0.4458003166583123,
|
|
"calibration/coverage@20%": 0.5021032747158289,
|
|
"calibration/coverage@25%": 0.614310550618702,
|
|
"calibration/coverage@30%": 0.8264339062488768,
|
|
"calibration/coverage@5%": 0.2619767454497537,
|
|
"calibration/ece": 0.14291348187392,
|
|
"calibration/mean_confidence": 0.6251500991165597,
|
|
"calibration/prompt_uniqueness": 0.8484265296158569,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.022048611111111137,
|
|
"completions/max_length": 3967.2,
|
|
"completions/max_terminated_length": 3967.2,
|
|
"completions/mean_length": 1073.7470703125,
|
|
"completions/mean_terminated_length": 1097.9343017578126,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 290.8,
|
|
"epoch": 0.32399595005062437,
|
|
"grad_norm": 0.0003013814566656947,
|
|
"learning_rate": 2.1987951807228917e-06,
|
|
"loss": -0.0192,
|
|
"num_tokens": 337074489.0,
|
|
"reward": 0.9745335817337036,
|
|
"reward_std": 0.14610227048397065,
|
|
"rewards/accuracy_reward": 0.6752604246139526,
|
|
"rewards/brier_reward": 0.8024451017379761,
|
|
"rewards/confidence_uniqueness_reward": 0.9262871026992798,
|
|
"rewards/format_reward": 0.9777777791023254,
|
|
"rewards/frontier_aurc_reward": -0.0011367214610800147,
|
|
"rewards/frontier_coverage_0": 0.031039434671401977,
|
|
"rewards/frontier_coverage_1": 0.031039434671401977,
|
|
"rewards/frontier_coverage_10": 0.031039434671401977,
|
|
"rewards/frontier_coverage_15": 0.031039434671401977,
|
|
"rewards/frontier_coverage_20": 0.031039434671401977,
|
|
"rewards/frontier_coverage_25": 0.031039434671401977,
|
|
"rewards/frontier_coverage_5": 0.031039434671401977,
|
|
"rewards/frontier_ece_reward": 0.019231295213103294,
|
|
"rewards/frontier_entropy_batch_reward": -0.2948362112045288,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.163134765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888,
|
|
"signal/accuracy_reward/group_std_mean": 0.21833232939243316,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0815673828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0815673828125,
|
|
"signal/advantage_abs_mean": 0.10507439076900482,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10507439076900482,
|
|
"signal/advantage_pre_scale_std": 0.17383444905281067,
|
|
"signal/advantage_std": 0.17383444905281067,
|
|
"signal/brier_reward/centered_abs_mean": 0.1440066486597061,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8013888888888889,
|
|
"signal/brier_reward/group_std_mean": 0.18859705626964568,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014400665648281574,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014400665648281574,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04786202013492584,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7704861111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07885360568761826,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004786202218383551,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004786202218383551,
|
|
"signal/format_reward/centered_abs_mean": 0.0367078997194767,
|
|
"signal/format_reward/group_bin_occupancy": 0.15694444444444444,
|
|
"signal/format_reward/group_std_mean": 0.0658931627869606,
|
|
"signal/format_reward/group_zero_std_frac": 0.7444444417953491,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01835394985973835,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01835394985973835,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014502544421702624,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6881944444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002394520537927747,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8128181181964464e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8128181181964464e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18389809429645537,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8100694444444445,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2444453001022339,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18389809429645537,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8100694444444445,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2444453001022339,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18389809429645537,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8100694444444445,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2444453001022339,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18389809429645537,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8100694444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2444453001022339,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18389809429645537,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8100694444444445,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2444453001022339,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18389809429645537,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8100694444444445,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2444453001022339,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18389809429645537,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8100694444444445,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2444453001022339,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022987262811511753,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03592751622200012,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.679861111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04412141665816307,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003592751733958721,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003592751733958721,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3293720781803131,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7475694444444445,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.398487514257431,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03293720856308937,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03293720856308937,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12348685325559985,
|
|
"calibration/batch_distribution_entropy": 0.9452073614807169,
|
|
"calibration/batch_entropy_100bins": 0.9462193369534104,
|
|
"calibration/batch_entropy_10bins": 0.9452073614807169,
|
|
"calibration/batch_entropy_50bins": 0.9547051944481133,
|
|
"calibration/batch_uniqueness": 0.9456989720112439,
|
|
"calibration/buffer_distribution_entropy": 0.9442883870516695,
|
|
"calibration/buffer_entropy_100bins": 0.9466428389842558,
|
|
"calibration/buffer_entropy_10bins": 0.9442883870516695,
|
|
"calibration/buffer_entropy_50bins": 0.9555018867627533,
|
|
"calibration/confidence_entropy": 0.4717980803303433,
|
|
"calibration/coverage@0%": 0.05851068594704365,
|
|
"calibration/coverage@1%": 0.05851068594704365,
|
|
"calibration/coverage@10%": 0.4795897983929943,
|
|
"calibration/coverage@15%": 0.7379147049577794,
|
|
"calibration/coverage@20%": 0.822024690104827,
|
|
"calibration/coverage@25%": 0.9007581452775323,
|
|
"calibration/coverage@30%": 0.9805801689932363,
|
|
"calibration/coverage@5%": 0.21591114286610366,
|
|
"calibration/ece": 0.13830600383937194,
|
|
"calibration/mean_confidence": 0.6107551203312859,
|
|
"calibration/prompt_uniqueness": 0.8445896458858178,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02256944444444444,
|
|
"completions/max_length": 3882.6,
|
|
"completions/max_terminated_length": 3882.6,
|
|
"completions/mean_length": 1070.8292358398437,
|
|
"completions/mean_terminated_length": 1095.48896484375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 360.4,
|
|
"epoch": 0.33599580005249935,
|
|
"grad_norm": 0.00026106671430170536,
|
|
"learning_rate": 2.0481927710843377e-06,
|
|
"loss": -0.0181,
|
|
"num_tokens": 352514666.0,
|
|
"reward": 0.9759422659873962,
|
|
"reward_std": 0.13966879844665528,
|
|
"rewards/accuracy_reward": 0.6809027910232544,
|
|
"rewards/brier_reward": 0.7947320222854615,
|
|
"rewards/confidence_uniqueness_reward": 0.927030086517334,
|
|
"rewards/format_reward": 0.9770833492279053,
|
|
"rewards/frontier_aurc_reward": -0.0010663935798220336,
|
|
"rewards/frontier_coverage_0": 0.01920067030005157,
|
|
"rewards/frontier_coverage_1": 0.01920067030005157,
|
|
"rewards/frontier_coverage_10": 0.01920067030005157,
|
|
"rewards/frontier_coverage_15": 0.01920067030005157,
|
|
"rewards/frontier_coverage_20": 0.01920067030005157,
|
|
"rewards/frontier_coverage_25": 0.01920067030005157,
|
|
"rewards/frontier_coverage_5": 0.01920067030005157,
|
|
"rewards/frontier_ece_reward": 0.014913215488195419,
|
|
"rewards/frontier_entropy_batch_reward": -0.28385027050971984,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15207248330116271,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776,
|
|
"signal/accuracy_reward/group_std_mean": 0.2055516541004181,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07603624165058136,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07603624165058136,
|
|
"signal/advantage_abs_mean": 0.10126451849937439,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10126451849937439,
|
|
"signal/advantage_pre_scale_std": 0.16675151288509368,
|
|
"signal/advantage_std": 0.16675151288509368,
|
|
"signal/brier_reward/centered_abs_mean": 0.14222416579723357,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8145833333333332,
|
|
"signal/brier_reward/group_std_mean": 0.18435073792934417,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014222417026758194,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014222417026758194,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.043567462265491484,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7763888888888889,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07227423414587975,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00435674637556076,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00435674637556076,
|
|
"signal/format_reward/centered_abs_mean": 0.03331163227558136,
|
|
"signal/format_reward/group_bin_occupancy": 0.15555555555555556,
|
|
"signal/format_reward/group_std_mean": 0.06039545834064484,
|
|
"signal/format_reward/group_zero_std_frac": 0.7555555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01665581613779068,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01665581613779068,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013383281184360385,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7003472222222221,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002201914181932807,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.672910220804624e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.672910220804624e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1825083911418915,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24250001609325408,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1825083911418915,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24250001609325408,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1825083911418915,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24250001609325408,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1825083911418915,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24250001609325408,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1825083911418915,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24250001609325408,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1825083911418915,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24250001609325408,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1825083911418915,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8243055555555555,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24250001609325408,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022813548799604177,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03229107595980167,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6559027777777777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04013029932975769,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003229107800871134,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003229107800871134,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3295544445514679,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751736111111111,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3994203209877014,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03295544609427452,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03295544609427452,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16963840721934928,
|
|
"calibration/batch_distribution_entropy": 0.9839276501014798,
|
|
"calibration/batch_entropy_100bins": 0.9645983079691083,
|
|
"calibration/batch_entropy_10bins": 0.9839276501014798,
|
|
"calibration/batch_entropy_50bins": 0.9761114279031353,
|
|
"calibration/batch_uniqueness": 0.9537371663907928,
|
|
"calibration/buffer_distribution_entropy": 0.9547933136371283,
|
|
"calibration/buffer_entropy_100bins": 0.9584469083864826,
|
|
"calibration/buffer_entropy_10bins": 0.9547933136371283,
|
|
"calibration/buffer_entropy_50bins": 0.9646369849007005,
|
|
"calibration/confidence_entropy": 0.4893747832641259,
|
|
"calibration/coverage@0%": 0.07835068493854391,
|
|
"calibration/coverage@1%": 0.13061735160521057,
|
|
"calibration/coverage@10%": 0.3460718360292577,
|
|
"calibration/coverage@15%": 0.49166136873047694,
|
|
"calibration/coverage@20%": 0.6608290070486337,
|
|
"calibration/coverage@25%": 0.7759027731944482,
|
|
"calibration/coverage@30%": 0.835348769127411,
|
|
"calibration/coverage@5%": 0.17083356782142678,
|
|
"calibration/ece": 0.12891986027646224,
|
|
"calibration/mean_confidence": 0.531905021969054,
|
|
"calibration/prompt_uniqueness": 0.8497211296793032,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.02022569444444442,
|
|
"completions/max_length": 3858.6,
|
|
"completions/max_terminated_length": 3858.6,
|
|
"completions/mean_length": 1056.7715454101562,
|
|
"completions/mean_terminated_length": 1078.7728881835938,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 340.4,
|
|
"epoch": 0.34799565005437433,
|
|
"grad_norm": 0.0002779490314424038,
|
|
"learning_rate": 1.8975903614457832e-06,
|
|
"loss": -0.0175,
|
|
"num_tokens": 367753282.0,
|
|
"reward": 0.9878696322441101,
|
|
"reward_std": 0.13212112337350845,
|
|
"rewards/accuracy_reward": 0.6973958134651184,
|
|
"rewards/brier_reward": 0.798538327217102,
|
|
"rewards/confidence_uniqueness_reward": 0.931287407875061,
|
|
"rewards/format_reward": 0.9796875,
|
|
"rewards/frontier_aurc_reward": -0.0010055402875877918,
|
|
"rewards/frontier_coverage_0": 0.01543128564953804,
|
|
"rewards/frontier_coverage_1": 0.01543128564953804,
|
|
"rewards/frontier_coverage_10": 0.01543128564953804,
|
|
"rewards/frontier_coverage_15": 0.01543128564953804,
|
|
"rewards/frontier_coverage_20": 0.01543128564953804,
|
|
"rewards/frontier_coverage_25": 0.01778712384402752,
|
|
"rewards/frontier_coverage_5": 0.01543128564953804,
|
|
"rewards/frontier_ece_reward": 0.009699131641536952,
|
|
"rewards/frontier_entropy_batch_reward": -0.2599165678024292,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14628906548023224,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19895833333333335,
|
|
"signal/accuracy_reward/group_std_mean": 0.19899411499500275,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07314453274011612,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07314453274011612,
|
|
"signal/advantage_abs_mean": 0.0954915538430214,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0954915538430214,
|
|
"signal/advantage_pre_scale_std": 0.16041628420352935,
|
|
"signal/advantage_std": 0.16041628420352935,
|
|
"signal/brier_reward/centered_abs_mean": 0.13802383542060853,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/brier_reward/group_std_mean": 0.17916561365127565,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01380238328129053,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01380238328129053,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.041919562965631485,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8003472222222221,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06718875169754028,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00419195624999702,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00419195624999702,
|
|
"signal/format_reward/centered_abs_mean": 0.03240017406642437,
|
|
"signal/format_reward/group_bin_occupancy": 0.15208333333333335,
|
|
"signal/format_reward/group_std_mean": 0.05611613690853119,
|
|
"signal/format_reward/group_zero_std_frac": 0.7833333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.016200087033212185,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.016200087033212185,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001244223420508206,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6961805555555556,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002067322516813874,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.555279395688558e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.555279395688558e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18761368095874786,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24638648331165314,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18761368095874786,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24638648331165314,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18761368095874786,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24638648331165314,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18761368095874786,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24638648331165314,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18761368095874786,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24638648331165314,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16378130316734313,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.803125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21585616767406463,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020472663221880794,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020472663221880794,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18761368095874786,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24638648331165314,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023451711051166056,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02758239060640335,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.648611111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03442221805453301,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0027582390699535607,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0027582390699535607,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31868948936462405,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7479166666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38932323455810547,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03186894841492176,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03186894841492176,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14581408637367196,
|
|
"calibration/batch_distribution_entropy": 0.9620467147080662,
|
|
"calibration/batch_entropy_100bins": 0.9521593826962524,
|
|
"calibration/batch_entropy_10bins": 0.9620467147080662,
|
|
"calibration/batch_entropy_50bins": 0.9628477325770243,
|
|
"calibration/batch_uniqueness": 0.9493508453284983,
|
|
"calibration/buffer_distribution_entropy": 0.964346395839352,
|
|
"calibration/buffer_entropy_100bins": 0.9687164702434309,
|
|
"calibration/buffer_entropy_10bins": 0.964346395839352,
|
|
"calibration/buffer_entropy_50bins": 0.9727008787721898,
|
|
"calibration/confidence_entropy": 0.48253928652287603,
|
|
"calibration/coverage@0%": 0.11276037364275815,
|
|
"calibration/coverage@1%": 0.13155932925633518,
|
|
"calibration/coverage@10%": 0.5150240243089297,
|
|
"calibration/coverage@15%": 0.5969002751000211,
|
|
"calibration/coverage@20%": 0.6727406402156719,
|
|
"calibration/coverage@25%": 0.760355493197759,
|
|
"calibration/coverage@30%": 0.8286161138459842,
|
|
"calibration/coverage@5%": 0.41456603762425204,
|
|
"calibration/ece": 0.1701717909819578,
|
|
"calibration/mean_confidence": 0.5407732633406844,
|
|
"calibration/prompt_uniqueness": 0.8612606071323716,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016319444444444442,
|
|
"completions/max_length": 3997.8,
|
|
"completions/max_terminated_length": 3997.8,
|
|
"completions/mean_length": 1132.7577880859376,
|
|
"completions/mean_terminated_length": 1151.5529541015626,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 325.6,
|
|
"epoch": 0.3599955000562493,
|
|
"grad_norm": 0.00029174465453252196,
|
|
"learning_rate": 1.7469879518072292e-06,
|
|
"loss": -0.0143,
|
|
"num_tokens": 383912988.0,
|
|
"reward": 0.9861086249351502,
|
|
"reward_std": 0.14031601548194886,
|
|
"rewards/accuracy_reward": 0.6947916626930237,
|
|
"rewards/brier_reward": 0.8056597113609314,
|
|
"rewards/confidence_uniqueness_reward": 0.9327221870422363,
|
|
"rewards/format_reward": 0.9835069417953491,
|
|
"rewards/frontier_aurc_reward": -0.0011077008675783873,
|
|
"rewards/frontier_coverage_0": 0.01600627228617668,
|
|
"rewards/frontier_coverage_1": 0.01600627228617668,
|
|
"rewards/frontier_coverage_10": 0.01600627228617668,
|
|
"rewards/frontier_coverage_15": 0.01600627228617668,
|
|
"rewards/frontier_coverage_20": 0.019521726109087468,
|
|
"rewards/frontier_coverage_25": 0.04305166006088257,
|
|
"rewards/frontier_coverage_5": 0.01600627228617668,
|
|
"rewards/frontier_ece_reward": 0.006581637542694807,
|
|
"rewards/frontier_entropy_batch_reward": -0.2930577486753464,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1642144054174423,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2013888888888889,
|
|
"signal/accuracy_reward/group_std_mean": 0.21578683853149414,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08210720270872116,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08210720270872116,
|
|
"signal/advantage_abs_mean": 0.10177487134933472,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10177487134933472,
|
|
"signal/advantage_pre_scale_std": 0.16584074795246123,
|
|
"signal/advantage_std": 0.16584074795246123,
|
|
"signal/brier_reward/centered_abs_mean": 0.1371104210615158,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8177083333333333,
|
|
"signal/brier_reward/group_std_mean": 0.18097830712795257,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013711042888462543,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013711042888462543,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03890909440815449,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7947916666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0675606332719326,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003890909440815449,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003890909440815449,
|
|
"signal/format_reward/centered_abs_mean": 0.02860243022441864,
|
|
"signal/format_reward/group_bin_occupancy": 0.1545138888888889,
|
|
"signal/format_reward/group_std_mean": 0.055647566169500354,
|
|
"signal/format_reward/group_zero_std_frac": 0.7638889074325561,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01430121511220932,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01430121511220932,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015429736115038395,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6982638888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002718376787379384,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9287169561721384e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9287169561721384e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18298667073249816,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8253472222222221,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24271575808525087,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18298667073249816,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8253472222222221,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24271575808525087,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18298667073249816,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8253472222222221,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24271575808525087,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18298667073249816,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8253472222222221,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24271575808525087,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1552409678697586,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8149305555555555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20808847844600678,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019405121915042401,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019405121915042401,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07173716872930527,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8854166666666666,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0958094283938408,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008967146510258317,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008967146510258317,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18298667073249816,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8253472222222221,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24271575808525087,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002287333458662033,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02298327349126339,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7138888888888888,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.028938150405883788,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022983273956924677,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022983273956924677,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3349026620388031,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7590277777777777,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4039584219455719,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033490267023444174,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033490267023444174,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3599955000562493,
|
|
"eval_calibration/aurc": 0.14166378509288816,
|
|
"eval_calibration/batch_distribution_entropy": 0.8977435367759877,
|
|
"eval_calibration/batch_entropy_100bins": 0.6974634442420786,
|
|
"eval_calibration/batch_entropy_10bins": 0.8977435367759877,
|
|
"eval_calibration/batch_entropy_50bins": 0.7715932785376074,
|
|
"eval_calibration/batch_uniqueness": 0.8898039151925078,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9691951475996244,
|
|
"eval_calibration/buffer_entropy_100bins": 0.974236048816972,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9691951475996244,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9768814779586684,
|
|
"eval_calibration/confidence_entropy": 0.49954160875782266,
|
|
"eval_calibration/coverage@0%": 0.235383064516129,
|
|
"eval_calibration/coverage@1%": 0.235383064516129,
|
|
"eval_calibration/coverage@10%": 0.40305779569892475,
|
|
"eval_calibration/coverage@15%": 0.5608198924731183,
|
|
"eval_calibration/coverage@20%": 0.766633064516129,
|
|
"eval_calibration/coverage@25%": 0.9040658602150536,
|
|
"eval_calibration/coverage@30%": 0.9786626344086021,
|
|
"eval_calibration/coverage@5%": 0.235383064516129,
|
|
"eval_calibration/ece": 0.17516353628706025,
|
|
"eval_calibration/mean_confidence": 0.5908915400357312,
|
|
"eval_calibration/prompt_uniqueness": 0.8898039151925078,
|
|
"eval_completions/clipped_ratio": 0.016493055555555563,
|
|
"eval_completions/max_length": 3125.1666666666665,
|
|
"eval_completions/max_terminated_length": 3125.1666666666665,
|
|
"eval_completions/mean_length": 1075.0045369466145,
|
|
"eval_completions/mean_terminated_length": 1092.6835530598958,
|
|
"eval_completions/min_length": 76.0,
|
|
"eval_completions/min_terminated_length": 414.1666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 383912988.0,
|
|
"eval_reward": 0.9052900274594625,
|
|
"eval_reward_std": 0.2462936962644259,
|
|
"eval_rewards/accuracy_reward": 0.6831597089767456,
|
|
"eval_rewards/brier_reward": 0.8052956362565359,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8782645761966705,
|
|
"eval_rewards/format_reward": 0.9800347288449606,
|
|
"eval_rewards/frontier_aurc_reward": -0.0012432806252036244,
|
|
"eval_rewards/frontier_coverage_0": 0.024572810948787566,
|
|
"eval_rewards/frontier_coverage_1": 0.024572810948787566,
|
|
"eval_rewards/frontier_coverage_10": 0.024572810948787566,
|
|
"eval_rewards/frontier_coverage_15": 0.024731364154528517,
|
|
"eval_rewards/frontier_coverage_20": 0.0300130230995516,
|
|
"eval_rewards/frontier_coverage_25": 0.0688376184552908,
|
|
"eval_rewards/frontier_coverage_5": 0.024572810948787566,
|
|
"eval_rewards/frontier_ece_reward": 0.005823705461807549,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9800347288449606,
|
|
"eval_runtime": 215.2066,
|
|
"eval_samples_per_second": 4.647,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4214952240387599,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.46546245117982227,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21074761201937994,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21074761201937994,
|
|
"eval_signal/advantage_abs_mean": 0.20943692326545715,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20943692326545715,
|
|
"eval_signal/advantage_pre_scale_std": 0.2453278973698616,
|
|
"eval_signal/advantage_std": 0.2453278973698616,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.18495392551024756,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.84375,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2431354746222496,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018495393606523674,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018495393606523674,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.061455123126506805,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3611111111111111,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10940167804559071,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0061455123747388525,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0061455123747388525,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.03781467008714875,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.18055555555555555,
|
|
"eval_signal/format_reward/group_std_mean": 0.09148847094426553,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.5555555696288744,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018907335043574374,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.018907335043574374,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0021643370661574104,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.611111111111111,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004351850405024986,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7054214115196373e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7054214115196373e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.25391770899295807,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9236111111111112,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.3627063085635503,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.25391770899295807,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9236111111111112,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3627063085635503,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.25391770899295807,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9236111111111112,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.3627063085635503,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2522597908973694,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9201388888888888,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3606320917606354,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031532473706950745,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031532473706950745,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.15832207848628363,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8854166666666666,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.24005423734585443,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001979026031525185,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001979026031525185,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0908020759622256,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9479166666666669,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.11604747300346692,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011350259883329272,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011350259883329272,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.25391770899295807,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9236111111111112,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.3627063085635503,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031739713546509543,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.026654658528665703,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8993055555555555,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.03542460377017657,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026654657752563557,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026654657752563557,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.03781467008714875,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.18055555555555555,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.09148847094426553,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5555555696288744,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0037814672493065395,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0037814672493065395,
|
|
"eval_steps_per_second": 0.028,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1589188574324561,
|
|
"calibration/batch_distribution_entropy": 0.9682778490933727,
|
|
"calibration/batch_entropy_100bins": 0.9573909052133722,
|
|
"calibration/batch_entropy_10bins": 0.9682778490933727,
|
|
"calibration/batch_entropy_50bins": 0.966160103078671,
|
|
"calibration/batch_uniqueness": 0.9501737534173381,
|
|
"calibration/buffer_distribution_entropy": 0.9715026240037516,
|
|
"calibration/buffer_entropy_100bins": 0.9773167131994699,
|
|
"calibration/buffer_entropy_10bins": 0.9715026240037516,
|
|
"calibration/buffer_entropy_50bins": 0.9790737695937416,
|
|
"calibration/confidence_entropy": 0.5003052797857543,
|
|
"calibration/coverage@0%": 0.016352829482242193,
|
|
"calibration/coverage@1%": 0.016352829482242193,
|
|
"calibration/coverage@10%": 0.48774055947796324,
|
|
"calibration/coverage@15%": 0.5995304254029382,
|
|
"calibration/coverage@20%": 0.6832017543859649,
|
|
"calibration/coverage@25%": 0.8128728070175437,
|
|
"calibration/coverage@30%": 0.8958662280701754,
|
|
"calibration/coverage@5%": 0.059086569319640556,
|
|
"calibration/ece": 0.17449626019352468,
|
|
"calibration/mean_confidence": 0.5850255241339524,
|
|
"calibration/prompt_uniqueness": 0.8585340912346615,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.013802083333333326,
|
|
"completions/max_length": 3833.6,
|
|
"completions/max_terminated_length": 3833.6,
|
|
"completions/mean_length": 1044.0293334960938,
|
|
"completions/mean_terminated_length": 1058.7995727539062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 336.8,
|
|
"epoch": 0.3719953500581243,
|
|
"grad_norm": 0.0002921310951933265,
|
|
"learning_rate": 1.5963855421686747e-06,
|
|
"loss": -0.0106,
|
|
"num_tokens": 399047918.0,
|
|
"reward": 1.010855793952942,
|
|
"reward_std": 0.13225770443677903,
|
|
"rewards/accuracy_reward": 0.7420138955116272,
|
|
"rewards/brier_reward": 0.8184032678604126,
|
|
"rewards/confidence_uniqueness_reward": 0.9341031432151794,
|
|
"rewards/format_reward": 0.9859375,
|
|
"rewards/frontier_aurc_reward": -0.0009835207951255144,
|
|
"rewards/frontier_coverage_0": -0.003601994086056948,
|
|
"rewards/frontier_coverage_1": -0.003601994086056948,
|
|
"rewards/frontier_coverage_10": -0.003601994086056948,
|
|
"rewards/frontier_coverage_15": 0.0005078878486528993,
|
|
"rewards/frontier_coverage_20": 0.021516397967934607,
|
|
"rewards/frontier_coverage_25": 0.09996354579925537,
|
|
"rewards/frontier_coverage_5": -0.003601994086056948,
|
|
"rewards/frontier_ece_reward": 0.002238374725857284,
|
|
"rewards/frontier_entropy_batch_reward": -0.2992683291435242,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15916883647441865,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19791666666666666,
|
|
"signal/accuracy_reward/group_std_mean": 0.20814797878265381,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07958441823720933,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07958441823720933,
|
|
"signal/advantage_abs_mean": 0.09818726927042007,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09818726927042007,
|
|
"signal/advantage_pre_scale_std": 0.1613670289516449,
|
|
"signal/advantage_std": 0.1613670289516449,
|
|
"signal/brier_reward/centered_abs_mean": 0.12828820943832397,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8326388888888889,
|
|
"signal/brier_reward/group_std_mean": 0.16683202385902404,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012828820943832397,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012828820943832397,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03536626324057579,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8340277777777778,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05673680827021599,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003536626137793064,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003536626137793064,
|
|
"signal/format_reward/centered_abs_mean": 0.02351345494389534,
|
|
"signal/format_reward/group_bin_occupancy": 0.14583333333333334,
|
|
"signal/format_reward/group_std_mean": 0.04251343086361885,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01175672747194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01175672747194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015422179130837321,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.717013888888889,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027723016683012247,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.927772464114241e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.927772464114241e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16945272982120513,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8222222222222222,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22849854230880737,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16945272982120513,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8222222222222222,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22849854230880737,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16945272982120513,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8222222222222222,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22849854230880737,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15546642541885375,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8138888888888888,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2106780767440796,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019433303037658333,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019433303037658333,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08221425265073776,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8430555555555556,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11387116461992264,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010276781744323672,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010276781744323672,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07415155619382859,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9208333333333334,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09463738054037094,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009268944384530186,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009268944384530186,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16945272982120513,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8222222222222222,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22849854230880737,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00211815913207829,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019297819957137106,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7003472222222222,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02474020905792713,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019297819584608079,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019297819584608079,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3254176914691925,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7572916666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3945078909397125,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03254176788032055,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03254176788032055,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.13216890708804582,
|
|
"calibration/batch_distribution_entropy": 0.9391698914560569,
|
|
"calibration/batch_entropy_100bins": 0.9427522555652142,
|
|
"calibration/batch_entropy_10bins": 0.9391698914560569,
|
|
"calibration/batch_entropy_50bins": 0.9494372446380505,
|
|
"calibration/batch_uniqueness": 0.9452771036630393,
|
|
"calibration/buffer_distribution_entropy": 0.9759505965841125,
|
|
"calibration/buffer_entropy_100bins": 0.9831831344351396,
|
|
"calibration/buffer_entropy_10bins": 0.9759505965841125,
|
|
"calibration/buffer_entropy_50bins": 0.9833908527361022,
|
|
"calibration/confidence_entropy": 0.48900815017571675,
|
|
"calibration/coverage@0%": 0.06475746949854295,
|
|
"calibration/coverage@1%": 0.08512300474658473,
|
|
"calibration/coverage@10%": 0.6281017851935031,
|
|
"calibration/coverage@15%": 0.7311611629993731,
|
|
"calibration/coverage@20%": 0.7874305476139628,
|
|
"calibration/coverage@25%": 0.8517333333333333,
|
|
"calibration/coverage@30%": 0.8879999999999999,
|
|
"calibration/coverage@5%": 0.4113532997891937,
|
|
"calibration/ece": 0.1564265309065293,
|
|
"calibration/mean_confidence": 0.6150749953655152,
|
|
"calibration/prompt_uniqueness": 0.8499751975720826,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.016927083333333325,
|
|
"completions/max_length": 3559.4,
|
|
"completions/max_terminated_length": 3559.4,
|
|
"completions/mean_length": 1031.0409912109376,
|
|
"completions/mean_terminated_length": 1048.8473876953126,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 324.4,
|
|
"epoch": 0.38399520005999926,
|
|
"grad_norm": 0.0002907900488935411,
|
|
"learning_rate": 1.4457831325301204e-06,
|
|
"loss": -0.0123,
|
|
"num_tokens": 414012806.0,
|
|
"reward": 0.9759169220924377,
|
|
"reward_std": 0.1336510330438614,
|
|
"rewards/accuracy_reward": 0.6717013835906982,
|
|
"rewards/brier_reward": 0.8032342672348023,
|
|
"rewards/confidence_uniqueness_reward": 0.9325896739959717,
|
|
"rewards/format_reward": 0.982899296283722,
|
|
"rewards/frontier_aurc_reward": -0.001580100622959435,
|
|
"rewards/frontier_coverage_0": 0.02686268715187907,
|
|
"rewards/frontier_coverage_1": 0.02686268715187907,
|
|
"rewards/frontier_coverage_10": 0.02686268715187907,
|
|
"rewards/frontier_coverage_15": 0.02580845048651099,
|
|
"rewards/frontier_coverage_20": 0.0352854423224926,
|
|
"rewards/frontier_coverage_25": 0.1048676148056984,
|
|
"rewards/frontier_coverage_5": 0.02686268715187907,
|
|
"rewards/frontier_ece_reward": 0.0036205228650942447,
|
|
"rewards/frontier_entropy_batch_reward": -0.287257993221283,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15773654282093047,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1951388888888889,
|
|
"signal/accuracy_reward/group_std_mean": 0.20291894674301147,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.43888888955116273,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07886827141046523,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07886827141046523,
|
|
"signal/advantage_abs_mean": 0.0999557062983513,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0999557062983513,
|
|
"signal/advantage_pre_scale_std": 0.1613948255777359,
|
|
"signal/advantage_std": 0.1613948255777359,
|
|
"signal/brier_reward/centered_abs_mean": 0.13456721603870392,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8225694444444445,
|
|
"signal/brier_reward/group_std_mean": 0.17532566785812378,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013456722162663937,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013456722162663937,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03702561557292938,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8114583333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.062202471494674685,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003702561743557453,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003702561743557453,
|
|
"signal/format_reward/centered_abs_mean": 0.02643771693110466,
|
|
"signal/format_reward/group_bin_occupancy": 0.15069444444444444,
|
|
"signal/format_reward/group_std_mean": 0.04977613650262356,
|
|
"signal/format_reward/group_zero_std_frac": 0.794444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01321885846555233,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01321885846555233,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001971296383999288,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7038194444444443,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035364361479878425,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4641206255182625e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4641206255182625e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18115276098251343,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.237866547703743,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18115276098251343,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.237866547703743,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18115276098251343,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.237866547703743,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1428930014371872,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8180555555555555,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1900169789791107,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017861625878140331,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017861625878140331,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06546353325247764,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08640649169683456,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008182941586710512,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008182941586710512,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08405493348836898,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9111111111111111,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10799293369054794,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010506867663934826,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010506867663934826,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18115276098251343,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.237866547703743,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002264409465715289,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019202812016010283,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7010416666666667,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02441619634628296,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019202813040465117,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019202813040465117,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32310463190078736,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7458333333333333,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3927301824092865,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03231046348810196,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03231046348810196,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15014929418501763,
|
|
"calibration/batch_distribution_entropy": 0.9648822577428188,
|
|
"calibration/batch_entropy_100bins": 0.9536192526711391,
|
|
"calibration/batch_entropy_10bins": 0.9648822577428188,
|
|
"calibration/batch_entropy_50bins": 0.9641713027389625,
|
|
"calibration/batch_uniqueness": 0.9491115920985426,
|
|
"calibration/buffer_distribution_entropy": 0.9799014261000714,
|
|
"calibration/buffer_entropy_100bins": 0.9880403664396292,
|
|
"calibration/buffer_entropy_10bins": 0.9799014261000714,
|
|
"calibration/buffer_entropy_50bins": 0.9871609349048261,
|
|
"calibration/confidence_entropy": 0.47602209908381665,
|
|
"calibration/coverage@0%": 0.04914401400644559,
|
|
"calibration/coverage@1%": 0.17886424796002687,
|
|
"calibration/coverage@10%": 0.4479380671321317,
|
|
"calibration/coverage@15%": 0.5937473627521253,
|
|
"calibration/coverage@20%": 0.6905573444066685,
|
|
"calibration/coverage@25%": 0.7322448470073357,
|
|
"calibration/coverage@30%": 0.8138341728899103,
|
|
"calibration/coverage@5%": 0.38675869118806394,
|
|
"calibration/ece": 0.19224718889496667,
|
|
"calibration/mean_confidence": 0.5294937305316039,
|
|
"calibration/prompt_uniqueness": 0.8480453952249729,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015451388888888884,
|
|
"completions/max_length": 3866.6,
|
|
"completions/max_terminated_length": 3866.6,
|
|
"completions/mean_length": 1037.01435546875,
|
|
"completions/mean_terminated_length": 1053.4653564453124,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 347.4,
|
|
"epoch": 0.39599505006187424,
|
|
"grad_norm": 0.0003302933764643967,
|
|
"learning_rate": 1.2951807228915664e-06,
|
|
"loss": -0.011,
|
|
"num_tokens": 429098283.0,
|
|
"reward": 0.9782995223999024,
|
|
"reward_std": 0.12833615243434907,
|
|
"rewards/accuracy_reward": 0.6689236164093018,
|
|
"rewards/brier_reward": 0.7952205181121826,
|
|
"rewards/confidence_uniqueness_reward": 0.9354442358016968,
|
|
"rewards/format_reward": 0.9842013835906982,
|
|
"rewards/frontier_aurc_reward": -0.0011664081714116037,
|
|
"rewards/frontier_coverage_0": 0.028777531534433364,
|
|
"rewards/frontier_coverage_1": 0.028777531534433364,
|
|
"rewards/frontier_coverage_10": 0.028777531534433364,
|
|
"rewards/frontier_coverage_15": 0.03406037017703056,
|
|
"rewards/frontier_coverage_20": 0.047785230726003644,
|
|
"rewards/frontier_coverage_25": 0.11350053399801255,
|
|
"rewards/frontier_coverage_5": 0.028777531534433364,
|
|
"rewards/frontier_ece_reward": 0.0018203054147306829,
|
|
"rewards/frontier_entropy_batch_reward": -0.2537760019302368,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1449761286377907,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19791666666666666,
|
|
"signal/accuracy_reward/group_std_mean": 0.19713898301124572,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4166666746139526,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07248806431889535,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07248806431889535,
|
|
"signal/advantage_abs_mean": 0.0937883585691452,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0937883585691452,
|
|
"signal/advantage_pre_scale_std": 0.15323749482631682,
|
|
"signal/advantage_std": 0.15323749482631682,
|
|
"signal/brier_reward/centered_abs_mean": 0.13555509746074676,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/brier_reward/group_std_mean": 0.1763432115316391,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013555509969592094,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013555509969592094,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.035801272839307785,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8256944444444445,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.057362791150808334,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035801273304969074,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035801273304969074,
|
|
"signal/format_reward/centered_abs_mean": 0.025390624813735486,
|
|
"signal/format_reward/group_bin_occupancy": 0.1472222222222222,
|
|
"signal/format_reward/group_std_mean": 0.0450144499540329,
|
|
"signal/format_reward/group_zero_std_frac": 0.8222222447395324,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012695312406867743,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.012695312406867743,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013781745452433824,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002441568742506206,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7227181888301857e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7227181888301857e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19617225527763366,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8229166666666666,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.25922334790229795,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19617225527763366,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8229166666666666,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25922334790229795,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19617225527763366,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8229166666666666,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25922334790229795,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13480380773544312,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8225694444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17995196878910064,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016850476153194905,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016850476153194905,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06469424068927765,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9013888888888889,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08368255645036697,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008086780086159706,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008086780086159706,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08406549245119095,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8961805555555555,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10824144333600998,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010508187115192413,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010508187115192413,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19617225527763366,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8229166666666666,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25922334790229795,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002452153339982033,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019796424731612205,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6861111111111111,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025375865027308465,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001979642570950091,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001979642570950091,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3119848847389221,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7420138888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38362287282943724,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031198487058281897,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031198487058281897,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10765388586400701,
|
|
"calibration/batch_distribution_entropy": 0.9201008680078046,
|
|
"calibration/batch_entropy_100bins": 0.9304532773423361,
|
|
"calibration/batch_entropy_10bins": 0.9201008680078046,
|
|
"calibration/batch_entropy_50bins": 0.9365333991429792,
|
|
"calibration/batch_uniqueness": 0.9396124919073392,
|
|
"calibration/buffer_distribution_entropy": 0.9826422244857502,
|
|
"calibration/buffer_entropy_100bins": 0.9905602665567612,
|
|
"calibration/buffer_entropy_10bins": 0.9826422244857502,
|
|
"calibration/buffer_entropy_50bins": 0.9892516637227805,
|
|
"calibration/confidence_entropy": 0.45945668982066856,
|
|
"calibration/coverage@0%": 0.02905259854202034,
|
|
"calibration/coverage@1%": 0.02905259854202034,
|
|
"calibration/coverage@10%": 0.6277144528296503,
|
|
"calibration/coverage@15%": 0.8055394667389475,
|
|
"calibration/coverage@20%": 0.901696220950968,
|
|
"calibration/coverage@25%": 0.9473329814400046,
|
|
"calibration/coverage@30%": 0.9798907228554719,
|
|
"calibration/coverage@5%": 0.18769134199751772,
|
|
"calibration/ece": 0.11805985067322597,
|
|
"calibration/mean_confidence": 0.6558299405476236,
|
|
"calibration/prompt_uniqueness": 0.8448339007235921,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014409722222222232,
|
|
"completions/max_length": 3814.6,
|
|
"completions/max_terminated_length": 3814.6,
|
|
"completions/mean_length": 956.7868041992188,
|
|
"completions/mean_terminated_length": 970.8463134765625,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 320.4,
|
|
"epoch": 0.4079949000637492,
|
|
"grad_norm": 0.00033093104138970375,
|
|
"learning_rate": 1.1445783132530121e-06,
|
|
"loss": -0.0113,
|
|
"num_tokens": 443209651.0,
|
|
"reward": 0.9993075251579284,
|
|
"reward_std": 0.13218997418880463,
|
|
"rewards/accuracy_reward": 0.7213541626930237,
|
|
"rewards/brier_reward": 0.8155832767486573,
|
|
"rewards/confidence_uniqueness_reward": 0.9315274119377136,
|
|
"rewards/format_reward": 0.9855034708976745,
|
|
"rewards/frontier_aurc_reward": -0.0015495633939281105,
|
|
"rewards/frontier_coverage_0": 0.006550856120884419,
|
|
"rewards/frontier_coverage_1": 0.006550856120884419,
|
|
"rewards/frontier_coverage_10": 0.006582287885248661,
|
|
"rewards/frontier_coverage_15": 0.021447673067450525,
|
|
"rewards/frontier_coverage_20": 0.06830336079001427,
|
|
"rewards/frontier_coverage_25": 0.16457977890968323,
|
|
"rewards/frontier_coverage_5": 0.006550856120884419,
|
|
"rewards/frontier_ece_reward": 0.0008198617259040474,
|
|
"rewards/frontier_entropy_batch_reward": -0.32402017116546633,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.151953125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19895833333333332,
|
|
"signal/accuracy_reward/group_std_mean": 0.2033730238676071,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0759765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0759765625,
|
|
"signal/advantage_abs_mean": 0.09784262776374816,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09784262776374816,
|
|
"signal/advantage_pre_scale_std": 0.1593571901321411,
|
|
"signal/advantage_std": 0.1593571901321411,
|
|
"signal/brier_reward/centered_abs_mean": 0.13310655802488328,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8138888888888889,
|
|
"signal/brier_reward/group_std_mean": 0.1739350289106369,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013310655951499939,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013310655951499939,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03614392466843128,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8378472222222222,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05578758716583252,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036143924575299025,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036143924575299025,
|
|
"signal/format_reward/centered_abs_mean": 0.02267252579331398,
|
|
"signal/format_reward/group_bin_occupancy": 0.14444444444444443,
|
|
"signal/format_reward/group_std_mean": 0.03960092887282372,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01133626289665699,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01133626289665699,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021186517318710686,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6819444444444445,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003828370710834861,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.648314693942666e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.648314693942666e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1690923511981964,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22359244525432587,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021136544179171323,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021136544179171323,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1690923511981964,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22359244525432587,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021136544179171323,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021136544179171323,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1690543532371521,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2235435426235199,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021131794434040784,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021131794434040784,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10032611638307572,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8232638888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13506484925746917,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012540765106678008,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012540765106678008,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06498248800635338,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9329861111111111,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08248871117830277,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008122811093926429,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008122811093926429,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10937306880950928,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8958333333333334,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14055634438991546,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013671633554622532,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013671633554622532,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1690923511981964,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8170138888888889,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22359244525432587,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021136544179171323,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021136544179171323,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.017915211990475653,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7027777777777777,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02288214974105358,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017915211617946624,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017915211617946624,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3317939579486847,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40215290188789365,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03317939639091492,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03317939639091492,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.12350982471083516,
|
|
"calibration/batch_distribution_entropy": 0.9672992669923769,
|
|
"calibration/batch_entropy_100bins": 0.9545196973741442,
|
|
"calibration/batch_entropy_10bins": 0.9672992669923769,
|
|
"calibration/batch_entropy_50bins": 0.9654162654792131,
|
|
"calibration/batch_uniqueness": 0.9499382833178359,
|
|
"calibration/buffer_distribution_entropy": 0.982073392424633,
|
|
"calibration/buffer_entropy_100bins": 0.9903093056557356,
|
|
"calibration/buffer_entropy_10bins": 0.982073392424633,
|
|
"calibration/buffer_entropy_50bins": 0.9888724223724161,
|
|
"calibration/confidence_entropy": 0.5193928635458319,
|
|
"calibration/coverage@0%": 0.06705871595159879,
|
|
"calibration/coverage@1%": 0.06705871595159879,
|
|
"calibration/coverage@10%": 0.45455122392336095,
|
|
"calibration/coverage@15%": 0.6717985421009731,
|
|
"calibration/coverage@20%": 0.8306836402154552,
|
|
"calibration/coverage@25%": 0.9227183013732783,
|
|
"calibration/coverage@30%": 0.9658543341712893,
|
|
"calibration/coverage@5%": 0.2820532838090668,
|
|
"calibration/ece": 0.16841145912524702,
|
|
"calibration/mean_confidence": 0.5635972691055217,
|
|
"calibration/prompt_uniqueness": 0.8650919759662437,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01684027777777779,
|
|
"completions/max_length": 3941.2,
|
|
"completions/max_terminated_length": 3941.2,
|
|
"completions/mean_length": 955.89921875,
|
|
"completions/mean_terminated_length": 972.2580932617187,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 300.0,
|
|
"epoch": 0.4199947500656242,
|
|
"grad_norm": 0.00031754354131408036,
|
|
"learning_rate": 9.93975903614458e-07,
|
|
"loss": -0.014,
|
|
"num_tokens": 457329578.0,
|
|
"reward": 0.9924328446388244,
|
|
"reward_std": 0.13685493767261506,
|
|
"rewards/accuracy_reward": 0.7052083373069763,
|
|
"rewards/brier_reward": 0.8050451636314392,
|
|
"rewards/confidence_uniqueness_reward": 0.9324544668197632,
|
|
"rewards/format_reward": 0.9828993082046509,
|
|
"rewards/frontier_aurc_reward": -0.0010705198394134641,
|
|
"rewards/frontier_coverage_0": 0.00536943394690752,
|
|
"rewards/frontier_coverage_1": 0.00536943394690752,
|
|
"rewards/frontier_coverage_10": 0.0054389465600252155,
|
|
"rewards/frontier_coverage_15": 0.022105094417929648,
|
|
"rewards/frontier_coverage_20": 0.06661936640739441,
|
|
"rewards/frontier_coverage_25": 0.15077317059040068,
|
|
"rewards/frontier_coverage_5": 0.00536943394690752,
|
|
"rewards/frontier_ece_reward": -0.0005749327523517422,
|
|
"rewards/frontier_entropy_batch_reward": -0.28563171029090884,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15720486342906953,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20069444444444443,
|
|
"signal/accuracy_reward/group_std_mean": 0.21156745851039888,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3944444417953491,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07860243171453477,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07860243171453477,
|
|
"signal/advantage_abs_mean": 0.10034923404455184,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10034923404455184,
|
|
"signal/advantage_pre_scale_std": 0.16388348042964934,
|
|
"signal/advantage_std": 0.16388348042964934,
|
|
"signal/brier_reward/centered_abs_mean": 0.1324082151055336,
|
|
"signal/brier_reward/group_bin_occupancy": 0.842013888888889,
|
|
"signal/brier_reward/group_std_mean": 0.17141578793525697,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013240821473300458,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013240821473300458,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03817100264132023,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8347222222222221,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.059024860709905626,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003817100077867508,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003817100077867508,
|
|
"signal/format_reward/centered_abs_mean": 0.02693684846162796,
|
|
"signal/format_reward/group_bin_occupancy": 0.14618055555555556,
|
|
"signal/format_reward/group_std_mean": 0.04554474353790283,
|
|
"signal/format_reward/group_zero_std_frac": 0.8305555582046509,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01346842423081398,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01346842423081398,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001381588843651116,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7100694444444444,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002594554144889116,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7269860109081492e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7269860109081492e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18175126016139984,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24015596210956575,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002271890779957175,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002271890779957175,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18175126016139984,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24015596210956575,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002271890779957175,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002271890779957175,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18154462277889252,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8340277777777778,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2398768812417984,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022693077102303506,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022693077102303506,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09890482127666474,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8482638888888889,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.13202964663505554,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012363103218376637,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012363103218376637,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06385754197835922,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9204861111111111,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08191778510808945,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0007982192793861032,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007982192793861032,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10205547660589218,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.909375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13224542140960693,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012756934389472007,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012756934389472007,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18175126016139984,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8336805555555555,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24015596210956575,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002271890779957175,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002271890779957175,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01852937713265419,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.023432932049036025,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018529377412050962,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018529377412050962,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33074782490730287,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7576388888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.40171239376068113,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03307478278875351,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03307478278875351,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.09333554035618889,
|
|
"calibration/batch_distribution_entropy": 0.9642334917542463,
|
|
"calibration/batch_entropy_100bins": 0.9496345841886905,
|
|
"calibration/batch_entropy_10bins": 0.9642334917542463,
|
|
"calibration/batch_entropy_50bins": 0.9616782721713711,
|
|
"calibration/batch_uniqueness": 0.9488292532918384,
|
|
"calibration/buffer_distribution_entropy": 0.9812171950001629,
|
|
"calibration/buffer_entropy_100bins": 0.9899411271146056,
|
|
"calibration/buffer_entropy_10bins": 0.9812171950001629,
|
|
"calibration/buffer_entropy_50bins": 0.9884013265090363,
|
|
"calibration/confidence_entropy": 0.5117064412428437,
|
|
"calibration/coverage@0%": 0.11779823844610977,
|
|
"calibration/coverage@1%": 0.12516665949874134,
|
|
"calibration/coverage@10%": 0.6127230239560119,
|
|
"calibration/coverage@15%": 0.8097812733762957,
|
|
"calibration/coverage@20%": 0.8916710300405335,
|
|
"calibration/coverage@25%": 0.9569934360730594,
|
|
"calibration/coverage@30%": 0.9875,
|
|
"calibration/coverage@5%": 0.3797779002231238,
|
|
"calibration/ece": 0.17150899211730591,
|
|
"calibration/mean_confidence": 0.591707829096662,
|
|
"calibration/prompt_uniqueness": 0.8579332667838413,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014149305555555537,
|
|
"completions/max_length": 3917.2,
|
|
"completions/max_terminated_length": 3917.2,
|
|
"completions/mean_length": 926.2488037109375,
|
|
"completions/mean_terminated_length": 939.5196899414062,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 312.8,
|
|
"epoch": 0.4319946000674992,
|
|
"grad_norm": 0.000292018405161798,
|
|
"learning_rate": 8.433734939759036e-07,
|
|
"loss": -0.011,
|
|
"num_tokens": 471099932.0,
|
|
"reward": 0.9934727311134338,
|
|
"reward_std": 0.13324700593948363,
|
|
"rewards/accuracy_reward": 0.704600703716278,
|
|
"rewards/brier_reward": 0.7962529778480529,
|
|
"rewards/confidence_uniqueness_reward": 0.936048150062561,
|
|
"rewards/format_reward": 0.9856770753860473,
|
|
"rewards/frontier_aurc_reward": -0.0013238670071586967,
|
|
"rewards/frontier_coverage_0": -0.0011739198584109546,
|
|
"rewards/frontier_coverage_1": -0.0011739198584109546,
|
|
"rewards/frontier_coverage_10": -0.001000142702832818,
|
|
"rewards/frontier_coverage_15": 0.020229480788111688,
|
|
"rewards/frontier_coverage_20": 0.06501827016472816,
|
|
"rewards/frontier_coverage_25": 0.14618382453918458,
|
|
"rewards/frontier_coverage_5": -0.0011739198584109546,
|
|
"rewards/frontier_ece_reward": -0.0018186770612373948,
|
|
"rewards/frontier_entropy_batch_reward": -0.2753425925970078,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15680880844593048,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1965277777777778,
|
|
"signal/accuracy_reward/group_std_mean": 0.20510988235473632,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07840440422296524,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07840440422296524,
|
|
"signal/advantage_abs_mean": 0.09849014431238175,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09849014431238175,
|
|
"signal/advantage_pre_scale_std": 0.15953330397605897,
|
|
"signal/advantage_std": 0.15953330397605897,
|
|
"signal/brier_reward/centered_abs_mean": 0.13330689668655396,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8267361111111111,
|
|
"signal/brier_reward/group_std_mean": 0.17314621210098266,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013330690003931523,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013330690003931523,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03478739969432354,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8302083333333334,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05585672035813331,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034787400159984826,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034787400159984826,
|
|
"signal/format_reward/centered_abs_mean": 0.02369249127805233,
|
|
"signal/format_reward/group_bin_occupancy": 0.14652777777777776,
|
|
"signal/format_reward/group_std_mean": 0.042657271027565,
|
|
"signal/format_reward/group_zero_std_frac": 0.8277777791023254,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011846245639026165,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011846245639026165,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014823697507381438,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6843750000000001,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002641508309170604,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8529622684582138e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8529622684582138e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18787881731987,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8378472222222222,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24395534098148347,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023484852630645038,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023484852630645038,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18787881731987,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8378472222222222,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24395534098148347,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023484852630645038,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023484852630645038,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18747189342975618,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8371527777777779,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24344970285892487,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023433986585587262,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023433986585587262,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08262652903795242,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8614583333333332,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10986567437648773,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010328316362574696,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010328316362574696,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06542501747608184,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9225694444444444,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08352421820163727,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008178127114661038,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008178127114661038,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10522469878196716,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9090277777777779,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13572756350040435,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013153087813407183,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013153087813407183,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18787881731987,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8378472222222222,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24395534098148347,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023484852630645038,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023484852630645038,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.018781586736440658,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6805555555555556,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02334692105650902,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018781586550176144,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018781586550176144,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3329127550125122,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7572916666666667,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4022037506103516,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03329127728939056,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03329127728939056,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17310808674125525,
|
|
"calibration/batch_distribution_entropy": 0.9739914521413097,
|
|
"calibration/batch_entropy_100bins": 0.9587801895009787,
|
|
"calibration/batch_entropy_10bins": 0.9739914521413097,
|
|
"calibration/batch_entropy_50bins": 0.9714626911723826,
|
|
"calibration/batch_uniqueness": 0.9516766269618417,
|
|
"calibration/buffer_distribution_entropy": 0.9820326552070876,
|
|
"calibration/buffer_entropy_100bins": 0.9903904724796785,
|
|
"calibration/buffer_entropy_10bins": 0.9820326552070876,
|
|
"calibration/buffer_entropy_50bins": 0.9889127545565767,
|
|
"calibration/confidence_entropy": 0.5184269578648062,
|
|
"calibration/coverage@0%": 0.009482740614904756,
|
|
"calibration/coverage@1%": 0.009482740614904756,
|
|
"calibration/coverage@10%": 0.22542069316029859,
|
|
"calibration/coverage@15%": 0.4309821279380639,
|
|
"calibration/coverage@20%": 0.8618492692076174,
|
|
"calibration/coverage@25%": 0.9274268617021277,
|
|
"calibration/coverage@30%": 0.9595744680851064,
|
|
"calibration/coverage@5%": 0.035237447087000114,
|
|
"calibration/ece": 0.22277728809670955,
|
|
"calibration/mean_confidence": 0.5523523413300608,
|
|
"calibration/prompt_uniqueness": 0.862558140679127,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.014149305555555537,
|
|
"completions/max_length": 3864.0,
|
|
"completions/max_terminated_length": 3864.0,
|
|
"completions/mean_length": 923.6966186523438,
|
|
"completions/mean_terminated_length": 937.0326904296875,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 282.8,
|
|
"epoch": 0.44399445006937416,
|
|
"grad_norm": 0.00034543531364761293,
|
|
"learning_rate": 6.927710843373495e-07,
|
|
"loss": -0.0101,
|
|
"num_tokens": 484830965.0,
|
|
"reward": 0.984534227848053,
|
|
"reward_std": 0.14124380350112914,
|
|
"rewards/accuracy_reward": 0.6821180582046509,
|
|
"rewards/brier_reward": 0.7921573638916015,
|
|
"rewards/confidence_uniqueness_reward": 0.9371705174446106,
|
|
"rewards/format_reward": 0.9855902671813965,
|
|
"rewards/frontier_aurc_reward": -0.001408666034694761,
|
|
"rewards/frontier_coverage_0": 0.009120326023548841,
|
|
"rewards/frontier_coverage_1": 0.009120326023548841,
|
|
"rewards/frontier_coverage_10": 0.009218692220747471,
|
|
"rewards/frontier_coverage_15": 0.028891825303435325,
|
|
"rewards/frontier_coverage_20": 0.06944562941789627,
|
|
"rewards/frontier_coverage_25": 0.14148974865674974,
|
|
"rewards/frontier_coverage_5": 0.009120326023548841,
|
|
"rewards/frontier_ece_reward": -0.00228926861891523,
|
|
"rewards/frontier_entropy_batch_reward": -0.25461295545101165,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17507595419883729,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20277777777777778,
|
|
"signal/accuracy_reward/group_std_mean": 0.22589021325111389,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37777777314186095,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08753797709941864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08753797709941864,
|
|
"signal/advantage_abs_mean": 0.10619968473911286,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10619968473911286,
|
|
"signal/advantage_pre_scale_std": 0.16439965069293977,
|
|
"signal/advantage_std": 0.16439965069293977,
|
|
"signal/brier_reward/centered_abs_mean": 0.14218833446502685,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8506944444444443,
|
|
"signal/brier_reward/group_std_mean": 0.18213841021060945,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014218833483755588,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014218833483755588,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.034088420867919925,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8260416666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05641009286046028,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003408842021599412,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003408842021599412,
|
|
"signal/format_reward/centered_abs_mean": 0.02331814244389534,
|
|
"signal/format_reward/group_bin_occupancy": 0.14756944444444445,
|
|
"signal/format_reward/group_std_mean": 0.043689073622226716,
|
|
"signal/format_reward/group_zero_std_frac": 0.819444453716278,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01165907122194767,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.01165907122194767,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017288225702941419,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6871527777777777,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003313097590580583,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.161028405680554e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.161028405680554e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20681215226650237,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8489583333333334,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2656663477420807,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025851519778370856,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025851519778370856,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20681215226650237,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8489583333333334,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2656663477420807,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025851519778370856,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025851519778370856,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20644972324371338,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8489583333333334,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2652065873146057,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025806216057389975,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025806216057389975,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07617418020963669,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8944444444444445,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0993105873465538,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009521772735752165,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009521772735752165,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06728554219007492,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9090277777777779,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08726846128702163,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008410692913457751,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008410692913457751,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10936762541532516,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8864583333333333,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.143340665102005,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013670953223481775,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013670953223481775,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20681215226650237,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8489583333333334,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2656663477420807,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025851519778370856,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025851519778370856,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.019747552648186683,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.671875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.024326668307185172,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019747552461922167,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019747552461922167,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3227647304534912,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7583333333333332,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3943642437458038,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03227647431194782,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03227647431194782,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1515759443307232,
|
|
"calibration/batch_distribution_entropy": 0.9423158571273464,
|
|
"calibration/batch_entropy_100bins": 0.9422109701984105,
|
|
"calibration/batch_entropy_10bins": 0.9423158571273464,
|
|
"calibration/batch_entropy_50bins": 0.9492567887186854,
|
|
"calibration/batch_uniqueness": 0.9448453349588843,
|
|
"calibration/buffer_distribution_entropy": 0.9824175333911814,
|
|
"calibration/buffer_entropy_100bins": 0.9906260527790651,
|
|
"calibration/buffer_entropy_10bins": 0.9824175333911814,
|
|
"calibration/buffer_entropy_50bins": 0.9891563716955704,
|
|
"calibration/confidence_entropy": 0.4806348039531195,
|
|
"calibration/coverage@0%": 0.05204903964451134,
|
|
"calibration/coverage@1%": 0.05204903964451134,
|
|
"calibration/coverage@10%": 0.40964392226487145,
|
|
"calibration/coverage@15%": 0.48947900867296956,
|
|
"calibration/coverage@20%": 0.8313676437985501,
|
|
"calibration/coverage@25%": 0.9193766286216984,
|
|
"calibration/coverage@30%": 0.9560233821947192,
|
|
"calibration/coverage@5%": 0.18513113847849771,
|
|
"calibration/ece": 0.15951463187555487,
|
|
"calibration/mean_confidence": 0.6206274668368617,
|
|
"calibration/prompt_uniqueness": 0.8566573182834241,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.010850694444444465,
|
|
"completions/max_length": 3679.0,
|
|
"completions/max_terminated_length": 3679.0,
|
|
"completions/mean_length": 895.5675415039062,
|
|
"completions/mean_terminated_length": 905.3911865234375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 297.8,
|
|
"epoch": 0.45599430007124914,
|
|
"grad_norm": 0.0003213706368114799,
|
|
"learning_rate": 5.421686746987952e-07,
|
|
"loss": -0.0095,
|
|
"num_tokens": 498230847.0,
|
|
"reward": 1.0024492621421814,
|
|
"reward_std": 0.13486847281455994,
|
|
"rewards/accuracy_reward": 0.7212673664093018,
|
|
"rewards/brier_reward": 0.8124405860900878,
|
|
"rewards/confidence_uniqueness_reward": 0.9361489892005921,
|
|
"rewards/format_reward": 0.989149296283722,
|
|
"rewards/frontier_aurc_reward": -0.0017552036792039872,
|
|
"rewards/frontier_coverage_0": 0.004374879878014326,
|
|
"rewards/frontier_coverage_1": 0.004374879878014326,
|
|
"rewards/frontier_coverage_10": 0.005093986354768276,
|
|
"rewards/frontier_coverage_15": 0.038227176293730736,
|
|
"rewards/frontier_coverage_20": 0.10161207318305969,
|
|
"rewards/frontier_coverage_25": 0.19290560781955718,
|
|
"rewards/frontier_coverage_5": 0.004374879878014326,
|
|
"rewards/frontier_ece_reward": -0.002311352139804512,
|
|
"rewards/frontier_entropy_batch_reward": -0.3175202667713165,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1665961414575577,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20451388888888888,
|
|
"signal/accuracy_reward/group_std_mean": 0.22064870595932007,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.08329807072877884,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.08329807072877884,
|
|
"signal/advantage_abs_mean": 0.09963465481996536,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09963465481996536,
|
|
"signal/advantage_pre_scale_std": 0.1596238434314728,
|
|
"signal/advantage_std": 0.1596238434314728,
|
|
"signal/brier_reward/centered_abs_mean": 0.13139403611421585,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8072916666666666,
|
|
"signal/brier_reward/group_std_mean": 0.1725013792514801,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013139403238892556,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013139403238892556,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03191892094910145,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8465277777777779,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05261510461568832,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003191892057657242,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003191892057657242,
|
|
"signal/format_reward/centered_abs_mean": 0.01899414099752903,
|
|
"signal/format_reward/group_bin_occupancy": 0.14444444444444446,
|
|
"signal/format_reward/group_std_mean": 0.03723305016756058,
|
|
"signal/format_reward/group_zero_std_frac": 0.8444444417953492,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009497070498764516,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.009497070498764516,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002246159012429416,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6746527777777779,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004121129959821701,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.80769876553677e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.80769876553677e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1798312783241272,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8173611111111111,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2366260141134262,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022478910628706216,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022478910628706216,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1798312783241272,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8173611111111111,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2366260141134262,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022478910628706216,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022478910628706216,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1779997855424881,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.815625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23430375754833221,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002224997291341424,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002224997291341424,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06649869680404663,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.903125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08655229657888412,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008312337566167116,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008312337566167116,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07941063195466995,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9229166666666668,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10138371139764786,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009926329017616808,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009926329017616808,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12681428492069244,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8920138888888889,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16400834619998933,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015851786360144616,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015851786360144616,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1798312783241272,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8173611111111111,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2366260141134262,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022478910628706216,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022478910628706216,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.017472782731056215,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6562499999999999,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021824596077203752,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017472783569246531,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017472783569246531,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34197773933410647,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4096067249774933,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034197773039340976,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034197773039340976,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18726056542504158,
|
|
"calibration/batch_distribution_entropy": 0.973440015714076,
|
|
"calibration/batch_entropy_100bins": 0.9598008556478849,
|
|
"calibration/batch_entropy_10bins": 0.973440015714076,
|
|
"calibration/batch_entropy_50bins": 0.9692972792520752,
|
|
"calibration/batch_uniqueness": 0.9511735907416039,
|
|
"calibration/buffer_distribution_entropy": 0.9823656842730131,
|
|
"calibration/buffer_entropy_100bins": 0.9906323413760862,
|
|
"calibration/buffer_entropy_10bins": 0.9823656842730131,
|
|
"calibration/buffer_entropy_50bins": 0.9891423825566555,
|
|
"calibration/confidence_entropy": 0.47507839773875,
|
|
"calibration/coverage@0%": 0.03543482683308126,
|
|
"calibration/coverage@1%": 0.03543482683308126,
|
|
"calibration/coverage@10%": 0.38248000231358287,
|
|
"calibration/coverage@15%": 0.459137786105719,
|
|
"calibration/coverage@20%": 0.5653955251403547,
|
|
"calibration/coverage@25%": 0.6885020825219144,
|
|
"calibration/coverage@30%": 0.8656397790055248,
|
|
"calibration/coverage@5%": 0.11277843696936363,
|
|
"calibration/ece": 0.18294151824470856,
|
|
"calibration/mean_confidence": 0.5592223827685562,
|
|
"calibration/prompt_uniqueness": 0.8476112297769705,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.015451388888888884,
|
|
"completions/max_length": 3607.2,
|
|
"completions/max_terminated_length": 3607.2,
|
|
"completions/mean_length": 901.8653686523437,
|
|
"completions/mean_terminated_length": 916.1808471679688,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 269.8,
|
|
"epoch": 0.46799415007312406,
|
|
"grad_norm": 0.00037715397775173187,
|
|
"learning_rate": 3.91566265060241e-07,
|
|
"loss": -0.0117,
|
|
"num_tokens": 511701200.0,
|
|
"reward": 0.9781022071838379,
|
|
"reward_std": 0.13412580341100694,
|
|
"rewards/accuracy_reward": 0.6714409708976745,
|
|
"rewards/brier_reward": 0.7976385831832886,
|
|
"rewards/confidence_uniqueness_reward": 0.9342209815979003,
|
|
"rewards/format_reward": 0.9845486044883728,
|
|
"rewards/frontier_aurc_reward": -0.00212171315215528,
|
|
"rewards/frontier_coverage_0": 0.0240963838994503,
|
|
"rewards/frontier_coverage_1": 0.0240963838994503,
|
|
"rewards/frontier_coverage_10": 0.026926378719508648,
|
|
"rewards/frontier_coverage_15": 0.04270496740937233,
|
|
"rewards/frontier_coverage_20": 0.09552509933710099,
|
|
"rewards/frontier_coverage_25": 0.17143839299678804,
|
|
"rewards/frontier_coverage_5": 0.0240984745323658,
|
|
"rewards/frontier_ece_reward": -0.0005812996299937367,
|
|
"rewards/frontier_entropy_batch_reward": -0.28104971945285795,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15088432729244233,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19965277777777776,
|
|
"signal/accuracy_reward/group_std_mean": 0.2036748230457306,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.40277777910232543,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07544216364622117,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07544216364622117,
|
|
"signal/advantage_abs_mean": 0.09861928075551987,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09861928075551987,
|
|
"signal/advantage_pre_scale_std": 0.1607095330953598,
|
|
"signal/advantage_std": 0.1607095330953598,
|
|
"signal/brier_reward/centered_abs_mean": 0.1398274078965187,
|
|
"signal/brier_reward/group_bin_occupancy": 0.821875,
|
|
"signal/brier_reward/group_std_mean": 0.18089538514614106,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01398274227976799,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01398274227976799,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03514176532626152,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8291666666666666,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05587729141116142,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003514176746830344,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003514176746830344,
|
|
"signal/format_reward/centered_abs_mean": 0.023600259982049464,
|
|
"signal/format_reward/group_bin_occupancy": 0.14583333333333334,
|
|
"signal/format_reward/group_std_mean": 0.04200470522046089,
|
|
"signal/format_reward/group_zero_std_frac": 0.8333333373069763,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011800129991024732,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.011800129991024732,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025537875946611164,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.675,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004770330665633082,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.192234580637887e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.192234580637887e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17494458258152007,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8194444444444444,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23222445845603942,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002186807314865291,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002186807314865291,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17494458258152007,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8194444444444444,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23222445845603942,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002186807314865291,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002186807314865291,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1601300060749054,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.810763888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2132750004529953,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020016250899061562,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020016250899061562,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06419163271784782,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9180555555555555,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08303456008434296,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008023954229429364,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008023954229429364,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08162481337785721,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9072916666666666,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10481662452220916,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010203101439401508,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010203101439401508,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12805333733558655,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8833333333333332,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16557440161705017,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016006667632609607,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016006667632609607,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17488285303115844,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8194444444444444,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23214463293552398,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021860357141122223,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021860357141122223,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01619000006467104,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6190972222222222,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.020337154716253282,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016189999878406525,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016189999878406525,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32077054381370546,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7475694444444445,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3914342522621155,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0320770550519228,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0320770550519228,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1413520988930143,
|
|
"calibration/batch_distribution_entropy": 0.9650686192766728,
|
|
"calibration/batch_entropy_100bins": 0.9538989692481424,
|
|
"calibration/batch_entropy_10bins": 0.9650686192766728,
|
|
"calibration/batch_entropy_50bins": 0.9638523751233203,
|
|
"calibration/batch_uniqueness": 0.9494360647400925,
|
|
"calibration/buffer_distribution_entropy": 0.982301321487669,
|
|
"calibration/buffer_entropy_100bins": 0.9905973370960849,
|
|
"calibration/buffer_entropy_10bins": 0.982301321487669,
|
|
"calibration/buffer_entropy_50bins": 0.9891011438371244,
|
|
"calibration/confidence_entropy": 0.5122134728583332,
|
|
"calibration/coverage@0%": 0.04735598198587883,
|
|
"calibration/coverage@1%": 0.04735598198587883,
|
|
"calibration/coverage@10%": 0.4334796882439303,
|
|
"calibration/coverage@15%": 0.5858704996990151,
|
|
"calibration/coverage@20%": 0.8214784767983201,
|
|
"calibration/coverage@25%": 0.9256523961484797,
|
|
"calibration/coverage@30%": 0.9576857723071821,
|
|
"calibration/coverage@5%": 0.15706616943089902,
|
|
"calibration/ece": 0.16479729043171218,
|
|
"calibration/mean_confidence": 0.5972124416701616,
|
|
"calibration/prompt_uniqueness": 0.864258559515551,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.011718749999999977,
|
|
"completions/max_length": 3774.8,
|
|
"completions/max_terminated_length": 3774.8,
|
|
"completions/mean_length": 884.886376953125,
|
|
"completions/mean_terminated_length": 895.39912109375,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 292.2,
|
|
"epoch": 0.47999400007499904,
|
|
"grad_norm": 0.0003483534383121878,
|
|
"learning_rate": 2.409638554216868e-07,
|
|
"loss": -0.0089,
|
|
"num_tokens": 524962899.0,
|
|
"reward": 0.9923826336860657,
|
|
"reward_std": 0.13313800245523452,
|
|
"rewards/accuracy_reward": 0.69296875,
|
|
"rewards/brier_reward": 0.7982598781585694,
|
|
"rewards/confidence_uniqueness_reward": 0.9397276759147644,
|
|
"rewards/format_reward": 0.9881944417953491,
|
|
"rewards/frontier_aurc_reward": -0.001622817711904645,
|
|
"rewards/frontier_coverage_0": 0.009027415048331022,
|
|
"rewards/frontier_coverage_1": 0.009027415048331022,
|
|
"rewards/frontier_coverage_10": 0.013452378194779157,
|
|
"rewards/frontier_coverage_15": 0.041050878912210466,
|
|
"rewards/frontier_coverage_20": 0.0933085709810257,
|
|
"rewards/frontier_coverage_25": 0.16813298761844636,
|
|
"rewards/frontier_coverage_5": 0.009030948393046856,
|
|
"rewards/frontier_ece_reward": -0.0034804839408025144,
|
|
"rewards/frontier_entropy_batch_reward": -0.25917285978794097,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.15110134482383727,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19999999999999998,
|
|
"signal/accuracy_reward/group_std_mean": 0.20413728952407836,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3999999940395355,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07555067241191864,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07555067241191864,
|
|
"signal/advantage_abs_mean": 0.0965758740901947,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0965758740901947,
|
|
"signal/advantage_pre_scale_std": 0.15648123919963836,
|
|
"signal/advantage_std": 0.15648123919963836,
|
|
"signal/brier_reward/centered_abs_mean": 0.13323327153921127,
|
|
"signal/brier_reward/group_bin_occupancy": 0.836111111111111,
|
|
"signal/brier_reward/group_std_mean": 0.1752035677433014,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013323327712714671,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013323327712714671,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030282768607139587,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8295138888888889,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.053694164752960204,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003028276888653636,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003028276888653636,
|
|
"signal/format_reward/centered_abs_mean": 0.02018229179084301,
|
|
"signal/format_reward/group_bin_occupancy": 0.1486111111111111,
|
|
"signal/format_reward/group_std_mean": 0.04187272489070892,
|
|
"signal/format_reward/group_zero_std_frac": 0.8111111164093018,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010091145895421505,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010091145895421505,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018836703849956394,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6753472222222222,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034654059447348116,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3545879957964645e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3545879957964645e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17963458895683287,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8232638888888888,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23895832598209382,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002245432324707508,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002245432324707508,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17963458895683287,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8232638888888888,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23895832598209382,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002245432324707508,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002245432324707508,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15076417326927186,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.820138888888889,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2017397791147232,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018845521612092853,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018845521612092853,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.060654813051223756,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.91875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07899993509054185,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007581851677969098,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007581851677969098,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07843978106975555,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9059027777777778,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10048370212316513,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009804973145946861,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009804973145946861,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12252330780029297,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8850694444444442,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1579478919506073,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015315414173528552,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015315414173528552,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1795719563961029,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8232638888888888,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2388751685619354,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022446493152529,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022446493152529,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.016645903512835503,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5958333333333333,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.020757382735610008,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016645904397591949,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016645904397591949,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31962995529174804,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763888888888889,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3887501060962677,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031962993741035464,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031962993741035464,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.47999400007499904,
|
|
"eval_calibration/aurc": 0.144239155861395,
|
|
"eval_calibration/batch_distribution_entropy": 0.9221909700103857,
|
|
"eval_calibration/batch_entropy_100bins": 0.7062575776226812,
|
|
"eval_calibration/batch_entropy_10bins": 0.9221909700103857,
|
|
"eval_calibration/batch_entropy_50bins": 0.7956526029853941,
|
|
"eval_calibration/batch_uniqueness": 0.8993341113616218,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9820939519133489,
|
|
"eval_calibration/buffer_entropy_100bins": 0.990473516652059,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9820939519133489,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9889406860438233,
|
|
"eval_calibration/confidence_entropy": 0.5107617812049482,
|
|
"eval_calibration/coverage@0%": 0.30532034050179213,
|
|
"eval_calibration/coverage@1%": 0.30532034050179213,
|
|
"eval_calibration/coverage@10%": 0.5592517921146953,
|
|
"eval_calibration/coverage@15%": 0.6490031362007169,
|
|
"eval_calibration/coverage@20%": 0.7074932795698925,
|
|
"eval_calibration/coverage@25%": 0.9164986559139785,
|
|
"eval_calibration/coverage@30%": 0.9375,
|
|
"eval_calibration/coverage@5%": 0.33309811827956987,
|
|
"eval_calibration/ece": 0.25243557157011315,
|
|
"eval_calibration/mean_confidence": 0.544805507687573,
|
|
"eval_calibration/prompt_uniqueness": 0.8993341113616218,
|
|
"eval_completions/clipped_ratio": 0.01128472222222221,
|
|
"eval_completions/max_length": 2559.5,
|
|
"eval_completions/max_terminated_length": 2559.5,
|
|
"eval_completions/mean_length": 878.668935139974,
|
|
"eval_completions/mean_terminated_length": 888.665771484375,
|
|
"eval_completions/min_length": 66.83333333333333,
|
|
"eval_completions/min_terminated_length": 338.1666666666667,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 524962899.0,
|
|
"eval_reward": 0.9085456728935242,
|
|
"eval_reward_std": 0.236699769894282,
|
|
"eval_rewards/accuracy_reward": 0.6822916666666666,
|
|
"eval_rewards/brier_reward": 0.7939638197422028,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8837526738643646,
|
|
"eval_rewards/format_reward": 0.9887152711550394,
|
|
"eval_rewards/frontier_aurc_reward": -0.0017724030088478078,
|
|
"eval_rewards/frontier_coverage_0": 0.016208822838962078,
|
|
"eval_rewards/frontier_coverage_1": 0.016208822838962078,
|
|
"eval_rewards/frontier_coverage_10": 0.01999556540007082,
|
|
"eval_rewards/frontier_coverage_15": 0.04217005521059036,
|
|
"eval_rewards/frontier_coverage_20": 0.08990584810574849,
|
|
"eval_rewards/frontier_coverage_25": 0.15949934472640356,
|
|
"eval_rewards/frontier_coverage_5": 0.016208509060864646,
|
|
"eval_rewards/frontier_ece_reward": -0.0033823695460644863,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.9887152711550394,
|
|
"eval_runtime": 192.0973,
|
|
"eval_samples_per_second": 5.206,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4200303753217061,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.46436455845832825,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21001518766085306,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21001518766085306,
|
|
"eval_signal/advantage_abs_mean": 0.2051889697710673,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2051889697710673,
|
|
"eval_signal/advantage_pre_scale_std": 0.2356510510047277,
|
|
"eval_signal/advantage_std": 0.2356510510047277,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.18867906431357065,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8437500000000001,
|
|
"eval_signal/brier_reward/group_std_mean": 0.24486981829007468,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018867906493445236,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018867906493445236,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.053733758007486664,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.39236111111111116,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.08356440626084805,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005373376111189525,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005373376111189525,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.021538628575702507,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.1597222222222222,
|
|
"eval_signal/format_reward/group_std_mean": 0.05486780156691869,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010769314287851254,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.010769314287851254,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0029052970154831805,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.5902777777777777,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006144408291826646,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.63162131786036e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.63162131786036e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.2968921313683192,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9166666666666666,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.40362854798634845,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003711151541210711,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003711151541210711,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.2968921313683192,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9166666666666666,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.40362854798634845,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003711151541210711,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003711151541210711,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.23208573708931604,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9236111111111112,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.32233305275440216,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029010717601825795,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029010717601825795,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.07928842430313428,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9236111111111112,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.10499530161420505,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0009911053445345412,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009911053445345412,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.11974722519516945,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9236111111111112,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.15377535670995712,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014968403847888112,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014968403847888112,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.21281012147665024,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9201388888888888,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.2633419682582219,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002660126502936085,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002660126502936085,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.29676895836989087,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9166666666666666,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.40348292887210846,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037096117545540133,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037096117545540133,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.026015580942233402,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.763888888888889,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.03171707410365343,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0026015581485504904,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0026015581485504904,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.021538628575702507,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.1597222222222222,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.05486780156691869,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.722222238779068,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.002153862966224551,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002153862966224551,
|
|
"eval_steps_per_second": 0.031,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15963940113709016,
|
|
"calibration/batch_distribution_entropy": 0.9623404892352694,
|
|
"calibration/batch_entropy_100bins": 0.9498760133778437,
|
|
"calibration/batch_entropy_10bins": 0.9623404892352694,
|
|
"calibration/batch_entropy_50bins": 0.9618336723769684,
|
|
"calibration/batch_uniqueness": 0.9492106549711471,
|
|
"calibration/buffer_distribution_entropy": 0.9825242016449753,
|
|
"calibration/buffer_entropy_100bins": 0.9906980654782693,
|
|
"calibration/buffer_entropy_10bins": 0.9825242016449753,
|
|
"calibration/buffer_entropy_50bins": 0.9891967249919829,
|
|
"calibration/confidence_entropy": 0.49405691099373455,
|
|
"calibration/coverage@0%": 0.024152440271538782,
|
|
"calibration/coverage@1%": 0.024152440271538782,
|
|
"calibration/coverage@10%": 0.2680771936187322,
|
|
"calibration/coverage@15%": 0.5601533512380341,
|
|
"calibration/coverage@20%": 0.8208879719119526,
|
|
"calibration/coverage@25%": 0.9063656280346862,
|
|
"calibration/coverage@30%": 0.9609612165574376,
|
|
"calibration/coverage@5%": 0.18524929721217218,
|
|
"calibration/ece": 0.1749253833432953,
|
|
"calibration/mean_confidence": 0.574653688117391,
|
|
"calibration/prompt_uniqueness": 0.856641489325094,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.012413194444444442,
|
|
"completions/max_length": 3715.0,
|
|
"completions/max_terminated_length": 3715.0,
|
|
"completions/mean_length": 898.568408203125,
|
|
"completions/mean_terminated_length": 909.8266723632812,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 287.6,
|
|
"epoch": 0.491993850076874,
|
|
"grad_norm": 0.000315765239065513,
|
|
"learning_rate": 9.036144578313253e-08,
|
|
"loss": -0.0096,
|
|
"num_tokens": 538380359.0,
|
|
"reward": 1.0108286142349243,
|
|
"reward_std": 0.13068382143974305,
|
|
"rewards/accuracy_reward": 0.735850703716278,
|
|
"rewards/brier_reward": 0.7879548072814941,
|
|
"rewards/confidence_uniqueness_reward": 0.9393365502357482,
|
|
"rewards/format_reward": 0.987413203716278,
|
|
"rewards/frontier_aurc_reward": -0.0011491154204122723,
|
|
"rewards/frontier_coverage_0": -0.02586173443123698,
|
|
"rewards/frontier_coverage_1": -0.02586173443123698,
|
|
"rewards/frontier_coverage_10": -0.005061741080135107,
|
|
"rewards/frontier_coverage_15": 0.04176960214972496,
|
|
"rewards/frontier_coverage_20": 0.1021927997469902,
|
|
"rewards/frontier_coverage_25": 0.1827650785446167,
|
|
"rewards/frontier_coverage_5": -0.025736397597938776,
|
|
"rewards/frontier_ece_reward": -0.007271666172891855,
|
|
"rewards/frontier_entropy_batch_reward": -0.2584350109100342,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.158447265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.21330890357494353,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0792236328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0792236328125,
|
|
"signal/advantage_abs_mean": 0.09550892561674118,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09550892561674118,
|
|
"signal/advantage_pre_scale_std": 0.15468985438346863,
|
|
"signal/advantage_std": 0.15468985438346863,
|
|
"signal/brier_reward/centered_abs_mean": 0.13971660435199737,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8302083333333332,
|
|
"signal/brier_reward/group_std_mean": 0.17900753021240234,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0139716612175107,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0139716612175107,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031369969993829724,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.836111111111111,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.051959720253944394,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031369972042739392,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031369972042739392,
|
|
"signal/format_reward/centered_abs_mean": 0.021185980923473836,
|
|
"signal/format_reward/group_bin_occupancy": 0.14513888888888887,
|
|
"signal/format_reward/group_std_mean": 0.03983290120959282,
|
|
"signal/format_reward/group_zero_std_frac": 0.8388888955116272,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010592990461736918,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010592990461736918,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012949521420523523,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.6989583333333333,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002273207646794617,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6186902576009744e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6186902576009744e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20164394080638887,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8111111111111112,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26378363370895386,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025205494835972785,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025205494835972785,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20164394080638887,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8111111111111112,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26378363370895386,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025205494835972785,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025205494835972785,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1517424076795578,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.80625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19990101754665374,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018967799842357635,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018967799842357635,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06446310877799988,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9152777777777776,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08208967447280884,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0008057888597249984,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008057888597249984,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07898852378129959,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9135416666666666,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10002039521932601,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009873565868474543,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009873565868474543,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11750572323799133,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8972222222222221,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14964151680469512,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014688215916976333,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014688215916976333,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20136131048202516,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8111111111111112,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2634296715259552,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002517016418278217,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002517016418278217,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.018270738050341608,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5680555555555555,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.022496170178055764,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018270738422870637,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018270738422870637,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32475059032440184,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7611111111111112,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39458569288253786,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03247505947947502,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03247505947947502,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10026530827954756,
|
|
"calibration/batch_distribution_entropy": 0.9666801209614921,
|
|
"calibration/batch_entropy_100bins": 0.9541731924906994,
|
|
"calibration/batch_entropy_10bins": 0.9666801209614921,
|
|
"calibration/batch_entropy_50bins": 0.9658220780724568,
|
|
"calibration/batch_uniqueness": 0.9500334187474572,
|
|
"calibration/buffer_distribution_entropy": 0.9828309697501236,
|
|
"calibration/buffer_entropy_100bins": 0.9908304445646857,
|
|
"calibration/buffer_entropy_10bins": 0.9828309697501236,
|
|
"calibration/buffer_entropy_50bins": 0.9893492960251346,
|
|
"calibration/confidence_entropy": 0.5110887140777053,
|
|
"calibration/coverage@0%": 0.06926601280103145,
|
|
"calibration/coverage@1%": 0.06926601280103145,
|
|
"calibration/coverage@10%": 0.5960768061887002,
|
|
"calibration/coverage@15%": 0.7677694893401483,
|
|
"calibration/coverage@20%": 0.8755652253994567,
|
|
"calibration/coverage@25%": 0.9500483492195055,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.3437007874015749,
|
|
"calibration/ece": 0.15719824569018936,
|
|
"calibration/mean_confidence": 0.5882376328465995,
|
|
"calibration/prompt_uniqueness": 0.8630131514079985,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01171875,
|
|
"completions/max_length": 3914.6666666666665,
|
|
"completions/max_terminated_length": 3914.6666666666665,
|
|
"completions/mean_length": 903.0143229166666,
|
|
"completions/mean_terminated_length": 913.7917277018229,
|
|
"completions/min_length": 0.0,
|
|
"completions/min_terminated_length": 272.6666666666667,
|
|
"epoch": 0.49919376007799904,
|
|
"num_tokens": 546488042.0,
|
|
"reward": 0.9862767457962036,
|
|
"reward_std": 0.13542574644088745,
|
|
"rewards/accuracy_reward": 0.6841724514961243,
|
|
"rewards/brier_reward": 0.7893265883127848,
|
|
"rewards/confidence_uniqueness_reward": 0.9390823642412821,
|
|
"rewards/format_reward": 0.9881365696589152,
|
|
"rewards/frontier_aurc_reward": -0.0011579099421699841,
|
|
"rewards/frontier_coverage_0": 0.0056370516152431565,
|
|
"rewards/frontier_coverage_1": 0.0056370516152431565,
|
|
"rewards/frontier_coverage_10": 0.01475714573947092,
|
|
"rewards/frontier_coverage_15": 0.043342759211858116,
|
|
"rewards/frontier_coverage_20": 0.09414516886075337,
|
|
"rewards/frontier_coverage_25": 0.16349380711714426,
|
|
"rewards/frontier_coverage_5": 0.00573221566931655,
|
|
"rewards/frontier_ece_reward": -0.0046526785008609295,
|
|
"rewards/frontier_entropy_batch_reward": -0.2639826734860738,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.156602643430233,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19849537037037038,
|
|
"signal/accuracy_reward/group_std_mean": 0.20787501335144043,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.41203704476356506,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0783013217151165,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0783013217151165,
|
|
"signal/advantage_abs_mean": 0.0979112833738327,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0979112833738327,
|
|
"signal/advantage_pre_scale_std": 0.1580300529797872,
|
|
"signal/advantage_std": 0.1580300529797872,
|
|
"signal/brier_reward/centered_abs_mean": 0.14230993390083313,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8258101851851851,
|
|
"signal/brier_reward/group_std_mean": 0.18486674626668295,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014230993886788687,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014230993886788687,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03189393070836862,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.7905092592592592,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05960730090737343,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003189393396799763,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003189393396799763,
|
|
"signal/format_reward/centered_abs_mean": 0.021647136037548382,
|
|
"signal/format_reward/group_bin_occupancy": 0.1527777777777778,
|
|
"signal/format_reward/group_std_mean": 0.04763485739628474,
|
|
"signal/format_reward/group_zero_std_frac": 0.7777777910232544,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010823568018774191,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.010823568018774191,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013959337957203388,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7077546296296297,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026435600593686104,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7449173355998937e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7449173355998937e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20097551246484122,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8368055555555557,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26183244585990906,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002512193905810515,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002512193905810515,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20097551246484122,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8368055555555557,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26183244585990906,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002512193905810515,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002512193905810515,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14055238167444864,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8327546296296297,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18522140880425772,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017569047631695867,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017569047631695867,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06127483397722244,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9293981481481483,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.07892606407403946,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0007659354790424308,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0007659354790424308,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0779200370113055,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8993055555555557,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10050106793642044,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009740005092074474,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009740005092074474,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11800318956375122,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8865740740740741,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15238422652085623,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001475039830741783,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001475039830741783,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20059374471505484,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8368055555555557,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26134761174519855,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025074218089381852,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025074218089381852,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01781535955766837,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.5752314814814815,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02214539299408595,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017815359557668369,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017815359557668369,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3191050589084625,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7662037037037037,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38939042886098224,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03191050638755163,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03191050638755163,
|
|
"step": 208,
|
|
"total_flos": 0.0,
|
|
"train_loss": -0.012413898850074755,
|
|
"train_runtime": 46320.0455,
|
|
"train_samples_per_second": 0.324,
|
|
"train_steps_per_second": 0.004
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 208,
|
|
"num_input_tokens_seen": 546488042,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|