11044 lines
695 KiB
JSON
11044 lines
695 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.6355440224731985,
|
|
"calibration/batch_distribution_entropy": 0.6404299093794114,
|
|
"calibration/batch_entropy_100bins": 0.477031643413819,
|
|
"calibration/batch_entropy_10bins": 0.6404299093794114,
|
|
"calibration/batch_entropy_50bins": 0.5591541369767675,
|
|
"calibration/batch_uniqueness": 0.7208924159407343,
|
|
"calibration/confidence_entropy": 0.34918231888339113,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.49676340149214465,
|
|
"calibration/mean_confidence": 0.7938105030524442,
|
|
"calibration/prompt_uniqueness": 0.5982426959778107,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0369140625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1485.2,
|
|
"completions/mean_length": 270.7107421875,
|
|
"completions/mean_terminated_length": 222.20957946777344,
|
|
"completions/min_length": 1.8,
|
|
"completions/min_terminated_length": 1.8,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.04414910078048706,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.074,
|
|
"num_tokens": 17616110.0,
|
|
"reward": 0.5323251605033874,
|
|
"reward_std": 0.41435371041297914,
|
|
"rewards/accuracy_reward": 0.2212890625,
|
|
"rewards/brier_reward": 0.37365264296531675,
|
|
"rewards/confidence_uniqueness_reward": 0.4837990701198578,
|
|
"rewards/format_reward": 0.67568359375,
|
|
"rewards/frontier_aurc_reward": 0.30136591792106626,
|
|
"rewards/frontier_coverage_0": 0.30136591792106626,
|
|
"rewards/frontier_coverage_1": 0.30136591792106626,
|
|
"rewards/frontier_coverage_10": 0.30136591792106626,
|
|
"rewards/frontier_coverage_15": 0.30136591792106626,
|
|
"rewards/frontier_coverage_20": 0.30136591792106626,
|
|
"rewards/frontier_coverage_25": 0.30136591792106626,
|
|
"rewards/frontier_coverage_5": 0.30136591792106626,
|
|
"rewards/frontier_ece_reward": 0.30136591792106626,
|
|
"rewards/frontier_entropy_batch_reward": -0.6217953085899353,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2416259765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.2842506766319275,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.12081298828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.12081298828125,
|
|
"signal/advantage_abs_mean": 0.3522315204143524,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3522315204143524,
|
|
"signal/advantage_pre_scale_std": 0.4241958498954773,
|
|
"signal/advantage_std": 0.4241958498954773,
|
|
"signal/brier_reward/centered_abs_mean": 0.3212295413017273,
|
|
"signal/brier_reward/group_bin_occupancy": 0.748828125,
|
|
"signal/brier_reward/group_std_mean": 0.36608951091766356,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03212295435369015,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03212295435369015,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.3023835599422455,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.594140625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3513213813304901,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030238356068730356,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.030238356068730356,
|
|
"signal/format_reward/centered_abs_mean": 0.408074951171875,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.45624412298202516,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2040374755859375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.2040374755859375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003662504069507122,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.293000316619873,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.65546875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3444704055786133,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.029300032556056975,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.029300032556056975,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43757479190826415,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.380859375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4825276255607605,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.043757478892803195,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043757478892803195,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6721645281105297,
|
|
"calibration/batch_distribution_entropy": 0.6581675725952678,
|
|
"calibration/batch_entropy_100bins": 0.4893215163923771,
|
|
"calibration/batch_entropy_10bins": 0.6581675725952678,
|
|
"calibration/batch_entropy_50bins": 0.5712117371457062,
|
|
"calibration/batch_uniqueness": 0.7298019114502236,
|
|
"calibration/confidence_entropy": 0.34160307185532907,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5249416015362247,
|
|
"calibration/mean_confidence": 0.7863750362684165,
|
|
"calibration/prompt_uniqueness": 0.6165276543074268,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0361328125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1505.8,
|
|
"completions/mean_length": 264.63974609375,
|
|
"completions/mean_terminated_length": 216.997314453125,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.02532093971967697,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0755,
|
|
"num_tokens": 35426373.0,
|
|
"reward": 0.5459524154663086,
|
|
"reward_std": 0.39404299259185793,
|
|
"rewards/accuracy_reward": 0.20927734375,
|
|
"rewards/brier_reward": 0.3790619194507599,
|
|
"rewards/confidence_uniqueness_reward": 0.5123092293739319,
|
|
"rewards/format_reward": 0.71435546875,
|
|
"rewards/frontier_aurc_reward": 0.29989256858825686,
|
|
"rewards/frontier_coverage_0": 0.29989256858825686,
|
|
"rewards/frontier_coverage_1": 0.29989256858825686,
|
|
"rewards/frontier_coverage_10": 0.29989256858825686,
|
|
"rewards/frontier_coverage_15": 0.29989256858825686,
|
|
"rewards/frontier_coverage_20": 0.29989256858825686,
|
|
"rewards/frontier_coverage_25": 0.29989256858825686,
|
|
"rewards/frontier_coverage_5": 0.29989256858825686,
|
|
"rewards/frontier_ece_reward": 0.29989256858825686,
|
|
"rewards/frontier_entropy_batch_reward": -0.649796187877655,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.225921630859375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.27287338972091674,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1129608154296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1129608154296875,
|
|
"signal/advantage_abs_mean": 0.3253436267375946,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3253436267375946,
|
|
"signal/advantage_pre_scale_std": 0.403624951839447,
|
|
"signal/advantage_std": 0.403624951839447,
|
|
"signal/brier_reward/centered_abs_mean": 0.3087932109832764,
|
|
"signal/brier_reward/group_bin_occupancy": 0.76484375,
|
|
"signal/brier_reward/group_std_mean": 0.35776872634887696,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030879321694374084,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.030879321694374084,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.28355550169944765,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.593359375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3405479848384857,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028355551511049272,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.028355551511049272,
|
|
"signal/format_reward/centered_abs_mean": 0.376983642578125,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.43766148686408995,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1884918212890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1884918212890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003537729289382696,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.28301833271980287,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.67578125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.33786413073539734,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02830183431506157,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02830183431506157,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42037245631217957,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.391796875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4748634576797485,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042037245631217954,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042037245631217954,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5960428535605962,
|
|
"calibration/batch_distribution_entropy": 0.6396332627525727,
|
|
"calibration/batch_entropy_100bins": 0.48268498625474204,
|
|
"calibration/batch_entropy_10bins": 0.6396332627525727,
|
|
"calibration/batch_entropy_50bins": 0.5648247374535533,
|
|
"calibration/batch_uniqueness": 0.7106374576366465,
|
|
"calibration/buffer_distribution_entropy": 0.6633551500439849,
|
|
"calibration/buffer_entropy_100bins": 0.4946308797642045,
|
|
"calibration/buffer_entropy_10bins": 0.6633551500439849,
|
|
"calibration/buffer_entropy_50bins": 0.5778719937765917,
|
|
"calibration/confidence_entropy": 0.34154283209851816,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.48102077114836533,
|
|
"calibration/mean_confidence": 0.8027908687275225,
|
|
"calibration/prompt_uniqueness": 0.6133351997970939,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0189453125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1433.8,
|
|
"completions/mean_length": 204.99287109375,
|
|
"completions/mean_terminated_length": 179.41653137207032,
|
|
"completions/min_length": 2.6,
|
|
"completions/min_terminated_length": 2.6,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.016035104170441628,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0584,
|
|
"num_tokens": 52574236.0,
|
|
"reward": 0.6605345129966735,
|
|
"reward_std": 0.3093711197376251,
|
|
"rewards/accuracy_reward": 0.266015625,
|
|
"rewards/brier_reward": 0.4797984719276428,
|
|
"rewards/confidence_uniqueness_reward": 0.6384814620018006,
|
|
"rewards/format_reward": 0.87236328125,
|
|
"rewards/frontier_aurc_reward": 0.29653857182711363,
|
|
"rewards/frontier_coverage_0": 0.3118060424923897,
|
|
"rewards/frontier_coverage_1": 0.3118060424923897,
|
|
"rewards/frontier_coverage_10": 0.3118060424923897,
|
|
"rewards/frontier_coverage_15": 0.3118060424923897,
|
|
"rewards/frontier_coverage_20": 0.3118060424923897,
|
|
"rewards/frontier_coverage_25": 0.3118060424923897,
|
|
"rewards/frontier_coverage_5": 0.3118060424923897,
|
|
"rewards/frontier_ece_reward": 0.2838048979640007,
|
|
"rewards/frontier_entropy_batch_reward": -0.798531997203827,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.197265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.24309078156948088,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0986328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0986328125,
|
|
"signal/advantage_abs_mean": 0.2367635428905487,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2367635428905487,
|
|
"signal/advantage_pre_scale_std": 0.32340609431266787,
|
|
"signal/advantage_std": 0.32340609431266787,
|
|
"signal/brier_reward/centered_abs_mean": 0.27546623945236204,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8015625,
|
|
"signal/brier_reward/group_std_mean": 0.328661048412323,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027546624094247817,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.027546624094247817,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.20335004329681397,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.601953125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.2676876664161682,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020335004664957523,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.020335004664957523,
|
|
"signal/format_reward/centered_abs_mean": 0.205865478515625,
|
|
"signal/format_reward/group_bin_occupancy": 0.240625,
|
|
"signal/format_reward/group_std_mean": 0.30276075601577757,
|
|
"signal/format_reward/group_zero_std_frac": 0.075,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1029327392578125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1029327392578125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2184015023522079,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.2625438742339611,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.002730018919100985,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.002730018919100985,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2380078285932541,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.6953125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2922509163618088,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2380078285932541,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.6953125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2922509163618088,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2380078285932541,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.6953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2922509163618088,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2380078285932541,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.6953125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2922509163618088,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2380078285932541,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.6953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2922509163618088,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2380078285932541,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.6953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2922509163618088,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2380078285932541,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.6953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2922509163618088,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029750979971140622,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.24219779670238495,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.692578125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.29229960441589353,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024219780787825586,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024219780787825586,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2959599316120148,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.39140625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39782981276512147,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02959599420428276,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02959599420428276,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5208466666721476,
|
|
"calibration/batch_distribution_entropy": 0.6984473949280432,
|
|
"calibration/batch_entropy_100bins": 0.5226043753565173,
|
|
"calibration/batch_entropy_10bins": 0.6984473949280432,
|
|
"calibration/batch_entropy_50bins": 0.6099272547110044,
|
|
"calibration/batch_uniqueness": 0.7616123918390276,
|
|
"calibration/buffer_distribution_entropy": 0.6589676652976166,
|
|
"calibration/buffer_entropy_100bins": 0.49633332911980316,
|
|
"calibration/buffer_entropy_10bins": 0.6589676652976166,
|
|
"calibration/buffer_entropy_50bins": 0.5794713007771074,
|
|
"calibration/confidence_entropy": 0.36403103435288126,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3806335949122962,
|
|
"calibration/mean_confidence": 0.7821886770336945,
|
|
"calibration/prompt_uniqueness": 0.667976662783546,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.003515625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1149.6,
|
|
"completions/mean_length": 140.0291015625,
|
|
"completions/mean_terminated_length": 135.11297149658202,
|
|
"completions/min_length": 25.2,
|
|
"completions/min_terminated_length": 25.2,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.00845023151487112,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0121,
|
|
"num_tokens": 68926534.0,
|
|
"reward": 0.706857168674469,
|
|
"reward_std": 0.20300790965557097,
|
|
"rewards/accuracy_reward": 0.34228515625,
|
|
"rewards/brier_reward": 0.5721023082733154,
|
|
"rewards/confidence_uniqueness_reward": 0.7548041224479676,
|
|
"rewards/format_reward": 0.9798828125,
|
|
"rewards/frontier_aurc_reward": -0.0069354880601167675,
|
|
"rewards/frontier_coverage_0": 0.06275556683540344,
|
|
"rewards/frontier_coverage_1": 0.06275556683540344,
|
|
"rewards/frontier_coverage_10": 0.06275556683540344,
|
|
"rewards/frontier_coverage_15": 0.06275556683540344,
|
|
"rewards/frontier_coverage_20": 0.06275556683540344,
|
|
"rewards/frontier_coverage_25": 0.06275556683540344,
|
|
"rewards/frontier_coverage_5": 0.06275556683540344,
|
|
"rewards/frontier_ece_reward": -0.05814636992290616,
|
|
"rewards/frontier_entropy_batch_reward": -0.8650725841522217,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.201666259765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.206640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.25092312395572663,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.346875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1008331298828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1008331298828125,
|
|
"signal/advantage_abs_mean": 0.1547175496816635,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1547175496816635,
|
|
"signal/advantage_pre_scale_std": 0.2182164669036865,
|
|
"signal/advantage_std": 0.2182164669036865,
|
|
"signal/brier_reward/centered_abs_mean": 0.2411728620529175,
|
|
"signal/brier_reward/group_bin_occupancy": 0.828125,
|
|
"signal/brier_reward/group_std_mean": 0.2961599349975586,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02411728650331497,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02411728650331497,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.11858726739883423,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6859375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.15527499318122864,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011858727037906646,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011858727037906646,
|
|
"signal/format_reward/centered_abs_mean": 0.03802490234375,
|
|
"signal/format_reward/group_bin_occupancy": 0.18125,
|
|
"signal/format_reward/group_std_mean": 0.09205524399876594,
|
|
"signal/format_reward/group_zero_std_frac": 0.55,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.019012451171875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.019012451171875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004820964112877846,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74609375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00661336500197649,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.0262053739279506e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.0262053739279506e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.10514531433582305,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.6875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.16418030858039856,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10514531433582305,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.6875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16418030858039856,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10514531433582305,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.6875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16418030858039856,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10514531433582305,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.6875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16418030858039856,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10514531433582305,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.6875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16418030858039856,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10514531433582305,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.6875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16418030858039856,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10514531433582305,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.6875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16418030858039856,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013143164571374655,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1306596964597702,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.674609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.16075450479984282,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013065969571471214,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013065969571471214,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21918058693408965,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.380859375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3507233917713165,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.071875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021918059140443803,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021918059140443803,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6159178774567231,
|
|
"calibration/batch_distribution_entropy": 0.8289744360363516,
|
|
"calibration/batch_entropy_100bins": 0.6255120655845456,
|
|
"calibration/batch_entropy_10bins": 0.8289744360363516,
|
|
"calibration/batch_entropy_50bins": 0.7153792756260304,
|
|
"calibration/batch_uniqueness": 0.8485990399892221,
|
|
"calibration/buffer_distribution_entropy": 0.6896777359021345,
|
|
"calibration/buffer_entropy_100bins": 0.520473966835383,
|
|
"calibration/buffer_entropy_10bins": 0.6896777359021345,
|
|
"calibration/buffer_entropy_50bins": 0.6052089462001826,
|
|
"calibration/confidence_entropy": 0.43118102390537255,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3893414577098084,
|
|
"calibration/mean_confidence": 0.7016207582801898,
|
|
"calibration/prompt_uniqueness": 0.7763101453654324,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 825.0,
|
|
"completions/mean_length": 115.275390625,
|
|
"completions/mean_terminated_length": 114.02607116699218,
|
|
"completions/min_length": 29.0,
|
|
"completions/min_terminated_length": 29.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.004183254204690456,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 85040106.0,
|
|
"reward": 0.7534348726272583,
|
|
"reward_std": 0.18122220635414124,
|
|
"rewards/accuracy_reward": 0.36455078125,
|
|
"rewards/brier_reward": 0.6307034969329834,
|
|
"rewards/confidence_uniqueness_reward": 0.847357702255249,
|
|
"rewards/format_reward": 0.99326171875,
|
|
"rewards/frontier_aurc_reward": -0.006123499572277069,
|
|
"rewards/frontier_coverage_0": 0.07438097894191742,
|
|
"rewards/frontier_coverage_1": 0.07438097894191742,
|
|
"rewards/frontier_coverage_10": 0.07438097894191742,
|
|
"rewards/frontier_coverage_15": 0.07438097894191742,
|
|
"rewards/frontier_coverage_20": 0.07438097894191742,
|
|
"rewards/frontier_coverage_25": 0.07438097894191742,
|
|
"rewards/frontier_coverage_5": 0.07438097894191742,
|
|
"rewards/frontier_ece_reward": -0.03736944012343883,
|
|
"rewards/frontier_entropy_batch_reward": -0.7597236037254333,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.192083740234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.206640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2422287493944168,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.346875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0960418701171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0960418701171875,
|
|
"signal/advantage_abs_mean": 0.14200334548950194,
|
|
"signal/advantage_pre_scale_abs_mean": 0.14200334548950194,
|
|
"signal/advantage_pre_scale_std": 0.19599647223949432,
|
|
"signal/advantage_std": 0.19599647223949432,
|
|
"signal/brier_reward/centered_abs_mean": 0.23228658139705657,
|
|
"signal/brier_reward/group_bin_occupancy": 0.88359375,
|
|
"signal/brier_reward/group_std_mean": 0.28496087789535524,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023228658363223076,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.023228658363223076,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0650908425450325,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.745703125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09223922342061996,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006509084347635507,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006509084347635507,
|
|
"signal/format_reward/centered_abs_mean": 0.012933349609375,
|
|
"signal/format_reward/group_bin_occupancy": 0.148046875,
|
|
"signal/format_reward/group_std_mean": 0.03475438989698887,
|
|
"signal/format_reward/group_zero_std_frac": 0.815625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0064666748046875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0064666748046875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00406914739869535,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.739453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006008286867290736,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0864344666479154e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0864344666479154e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15233553797006608,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.795703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22025286853313447,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15233553797006608,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.795703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22025286853313447,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15233553797006608,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.795703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22025286853313447,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15233553797006608,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.795703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22025286853313447,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15233553797006608,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.795703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22025286853313447,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15233553797006608,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.795703125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22025286853313447,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15233553797006608,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.795703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22025286853313447,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019041943131014704,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.11693819165229798,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.14299911260604858,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011693819798529148,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011693819798529148,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34947873950004577,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.546875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.46870680451393126,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03494787439703941,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03494787439703941,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6253142563211475,
|
|
"calibration/batch_distribution_entropy": 0.9522747813721082,
|
|
"calibration/batch_entropy_100bins": 0.8099158365008308,
|
|
"calibration/batch_entropy_10bins": 0.9522747813721082,
|
|
"calibration/batch_entropy_50bins": 0.8759764029517731,
|
|
"calibration/batch_uniqueness": 0.9186538181527467,
|
|
"calibration/buffer_distribution_entropy": 0.7582834328496173,
|
|
"calibration/buffer_entropy_100bins": 0.580513610120933,
|
|
"calibration/buffer_entropy_10bins": 0.7582834328496173,
|
|
"calibration/buffer_entropy_50bins": 0.6668539149358436,
|
|
"calibration/confidence_entropy": 0.5189439527113646,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.006274509803921568,
|
|
"calibration/coverage@30%": 0.006274509803921568,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.2600914235574404,
|
|
"calibration/mean_confidence": 0.5131853527628318,
|
|
"calibration/prompt_uniqueness": 0.8575710010279367,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0013671875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 549.0,
|
|
"completions/mean_length": 111.59697265625,
|
|
"completions/mean_terminated_length": 109.64572296142578,
|
|
"completions/min_length": 35.8,
|
|
"completions/min_terminated_length": 35.8,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.00540167186409235,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0045,
|
|
"num_tokens": 101227467.0,
|
|
"reward": 0.8009824633598328,
|
|
"reward_std": 0.1627292662858963,
|
|
"rewards/accuracy_reward": 0.3607421875,
|
|
"rewards/brier_reward": 0.6999990940093994,
|
|
"rewards/confidence_uniqueness_reward": 0.9195750474929809,
|
|
"rewards/format_reward": 0.99453125,
|
|
"rewards/frontier_aurc_reward": -0.005178525112569332,
|
|
"rewards/frontier_coverage_0": 0.11951842457056046,
|
|
"rewards/frontier_coverage_1": 0.11951842457056046,
|
|
"rewards/frontier_coverage_10": 0.11951842457056046,
|
|
"rewards/frontier_coverage_15": 0.11951842457056046,
|
|
"rewards/frontier_coverage_20": 0.11951842457056046,
|
|
"rewards/frontier_coverage_25": 0.11951842457056046,
|
|
"rewards/frontier_coverage_5": 0.11951842457056046,
|
|
"rewards/frontier_ece_reward": -0.016483052633702755,
|
|
"rewards/frontier_entropy_batch_reward": -0.47356472015380857,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1861328125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.235753658413887,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09306640625,
|
|
"signal/advantage_abs_mean": 0.12720103561878204,
|
|
"signal/advantage_pre_scale_abs_mean": 0.12720103561878204,
|
|
"signal/advantage_pre_scale_std": 0.1749451279640198,
|
|
"signal/advantage_std": 0.1749451279640198,
|
|
"signal/brier_reward/centered_abs_mean": 0.2198871850967407,
|
|
"signal/brier_reward/group_bin_occupancy": 0.91796875,
|
|
"signal/brier_reward/group_std_mean": 0.270095694065094,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02198871858417988,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02198871858417988,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.047896023094654086,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.71328125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.07273447662591934,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004789602383971215,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004789602383971215,
|
|
"signal/format_reward/centered_abs_mean": 0.010546875,
|
|
"signal/format_reward/group_bin_occupancy": 0.1453125,
|
|
"signal/format_reward/group_std_mean": 0.029590686410665513,
|
|
"signal/format_reward/group_zero_std_frac": 0.8375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0052734375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0052734375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024232265073806047,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038812434300780295,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0290332506410778e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0290332506410778e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.24059977233409882,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.915625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.30937875509262086,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24059977233409882,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.915625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.30937875509262086,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24059977233409882,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.915625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.30937875509262086,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24059977233409882,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.915625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.30937875509262086,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.24059977233409882,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.915625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.30937875509262086,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.24059977233409882,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.915625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.30937875509262086,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24059977233409882,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.915625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.30937875509262086,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030074971728026867,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.09630160331726074,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.794140625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.11887068897485734,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009630160499364137,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009630160499364137,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45851866006851194,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758984375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5251657009124756,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04585186541080475,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04585186541080475,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5182934023322565,
|
|
"calibration/batch_distribution_entropy": 0.9200624959435266,
|
|
"calibration/batch_entropy_100bins": 0.9203410859236761,
|
|
"calibration/batch_entropy_10bins": 0.9200624959435266,
|
|
"calibration/batch_entropy_50bins": 0.9307206167012977,
|
|
"calibration/batch_uniqueness": 0.9428976783143688,
|
|
"calibration/buffer_distribution_entropy": 0.8451822558633515,
|
|
"calibration/buffer_entropy_100bins": 0.6813660232509321,
|
|
"calibration/buffer_entropy_10bins": 0.8451822558633515,
|
|
"calibration/buffer_entropy_50bins": 0.7589156786572054,
|
|
"calibration/confidence_entropy": 0.4994208357830646,
|
|
"calibration/coverage@0%": 0.001573256191699024,
|
|
"calibration/coverage@1%": 0.001573256191699024,
|
|
"calibration/coverage@10%": 0.001573256191699024,
|
|
"calibration/coverage@15%": 0.001573256191699024,
|
|
"calibration/coverage@20%": 0.001573256191699024,
|
|
"calibration/coverage@25%": 0.0023606577665021737,
|
|
"calibration/coverage@30%": 0.009433349318565041,
|
|
"calibration/coverage@5%": 0.001573256191699024,
|
|
"calibration/ece": 0.19128083609792296,
|
|
"calibration/mean_confidence": 0.3484087206325151,
|
|
"calibration/prompt_uniqueness": 0.8813607651534859,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00205078125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 666.6,
|
|
"completions/mean_length": 113.24580078125,
|
|
"completions/mean_terminated_length": 110.32241821289062,
|
|
"completions/min_length": 37.2,
|
|
"completions/min_terminated_length": 37.2,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.004447246436029673,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0067,
|
|
"num_tokens": 117496576.0,
|
|
"reward": 0.8329338908195496,
|
|
"reward_std": 0.1337667301297188,
|
|
"rewards/accuracy_reward": 0.3892578125,
|
|
"rewards/brier_reward": 0.7226597905158997,
|
|
"rewards/confidence_uniqueness_reward": 0.9372900366783142,
|
|
"rewards/format_reward": 0.99619140625,
|
|
"rewards/frontier_aurc_reward": -0.0045765116810798645,
|
|
"rewards/frontier_coverage_0": 0.14142859876155853,
|
|
"rewards/frontier_coverage_1": 0.14142859876155853,
|
|
"rewards/frontier_coverage_10": 0.14142859876155853,
|
|
"rewards/frontier_coverage_15": 0.14142859876155853,
|
|
"rewards/frontier_coverage_20": 0.14142859876155853,
|
|
"rewards/frontier_coverage_25": 0.14142859876155853,
|
|
"rewards/frontier_coverage_5": 0.14142859876155853,
|
|
"rewards/frontier_ece_reward": -0.00010334124672226608,
|
|
"rewards/frontier_entropy_batch_reward": -0.3809317171573639,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18907470703125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.205859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.23918051719665528,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.353125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.094537353515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.094537353515625,
|
|
"signal/advantage_abs_mean": 0.10340933352708817,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10340933352708817,
|
|
"signal/advantage_pre_scale_std": 0.14824790954589845,
|
|
"signal/advantage_std": 0.14824790954589845,
|
|
"signal/brier_reward/centered_abs_mean": 0.1997154474258423,
|
|
"signal/brier_reward/group_bin_occupancy": 0.876171875,
|
|
"signal/brier_reward/group_std_mean": 0.251619490981102,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019971545413136484,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019971545413136484,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030846378952264785,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85234375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.049375799298286435,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030846379697322844,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030846379697322844,
|
|
"signal/format_reward/centered_abs_mean": 0.007379150390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.140234375,
|
|
"signal/format_reward/group_std_mean": 0.02154465951025486,
|
|
"signal/format_reward/group_zero_std_frac": 0.878125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0036895751953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0036895751953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013135876040905714,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002168184705078602,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.641984490561299e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.641984490561299e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3162419438362122,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3914815127849579,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3162419438362122,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3914815127849579,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3162419438362122,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3914815127849579,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3162419438362122,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3914815127849579,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3162419438362122,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3914815127849579,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3162419438362122,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3914815127849579,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3162419438362122,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3914815127849579,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003953024419024587,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.052671823650598526,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07692344933748245,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005267182365059853,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005267182365059853,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4283927083015442,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.822265625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.49359052777290346,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04283927157521248,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04283927157521248,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5740054027668963,
|
|
"calibration/batch_distribution_entropy": 0.9066430352384233,
|
|
"calibration/batch_entropy_100bins": 0.922959840976052,
|
|
"calibration/batch_entropy_10bins": 0.9066430352384233,
|
|
"calibration/batch_entropy_50bins": 0.9289300475616054,
|
|
"calibration/batch_uniqueness": 0.9406546241127016,
|
|
"calibration/buffer_distribution_entropy": 0.9044162694042044,
|
|
"calibration/buffer_entropy_100bins": 0.7619221563316753,
|
|
"calibration/buffer_entropy_10bins": 0.9044162694042044,
|
|
"calibration/buffer_entropy_50bins": 0.8276472839940714,
|
|
"calibration/confidence_entropy": 0.5025416372663043,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.18815817750495797,
|
|
"calibration/mean_confidence": 0.33185162881752184,
|
|
"calibration/prompt_uniqueness": 0.8810801774861978,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0017578125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 758.6,
|
|
"completions/mean_length": 120.3833984375,
|
|
"completions/mean_terminated_length": 117.88789825439453,
|
|
"completions/min_length": 42.0,
|
|
"completions/min_terminated_length": 42.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.00149272452108562,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0059,
|
|
"num_tokens": 133645974.0,
|
|
"reward": 0.8357627868652344,
|
|
"reward_std": 0.12221252173185349,
|
|
"rewards/accuracy_reward": 0.3927734375,
|
|
"rewards/brier_reward": 0.7258547782897949,
|
|
"rewards/confidence_uniqueness_reward": 0.9388667583465576,
|
|
"rewards/format_reward": 0.99716796875,
|
|
"rewards/frontier_aurc_reward": -0.004377355705946684,
|
|
"rewards/frontier_coverage_0": 0.13741703778505326,
|
|
"rewards/frontier_coverage_1": 0.13741703778505326,
|
|
"rewards/frontier_coverage_10": 0.13741703778505326,
|
|
"rewards/frontier_coverage_15": 0.13741703778505326,
|
|
"rewards/frontier_coverage_20": 0.13741703778505326,
|
|
"rewards/frontier_coverage_25": 0.13741703778505326,
|
|
"rewards/frontier_coverage_5": 0.13741703778505326,
|
|
"rewards/frontier_ece_reward": 0.003349437890574336,
|
|
"rewards/frontier_entropy_batch_reward": -0.3798429071903229,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17041015625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2198496311903,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.085205078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.085205078125,
|
|
"signal/advantage_abs_mean": 0.09425371885299683,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09425371885299683,
|
|
"signal/advantage_pre_scale_std": 0.13775794506072997,
|
|
"signal/advantage_std": 0.13775794506072997,
|
|
"signal/brier_reward/centered_abs_mean": 0.18953997492790223,
|
|
"signal/brier_reward/group_bin_occupancy": 0.882421875,
|
|
"signal/brier_reward/group_std_mean": 0.23842448592185975,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018953998014330863,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018953998014330863,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025395025685429572,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.911328125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03915891274809837,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002539502549916506,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002539502549916506,
|
|
"signal/format_reward/centered_abs_mean": 0.005462646484375,
|
|
"signal/format_reward/group_bin_occupancy": 0.135546875,
|
|
"signal/format_reward/group_std_mean": 0.015347770974040031,
|
|
"signal/format_reward/group_zero_std_frac": 0.915625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0027313232421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0027313232421875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001331974472850561,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.783203125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020305470563471316,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.6649681492708622e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.6649681492708622e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3050907075405121,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.938671875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.37571829557418823,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3050907075405121,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.938671875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.37571829557418823,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3050907075405121,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.938671875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.37571829557418823,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3050907075405121,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.938671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.37571829557418823,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3050907075405121,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.938671875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.37571829557418823,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3050907075405121,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.938671875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.37571829557418823,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3050907075405121,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.938671875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.37571829557418823,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038136340212076903,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.048742403835058214,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.72265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07104799449443817,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004874240513890982,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004874240513890982,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.40303301215171816,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.857421875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4729976952075958,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.040303300321102145,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.040303300321102145,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.38396500517480436,
|
|
"calibration/batch_distribution_entropy": 0.967181094225583,
|
|
"calibration/batch_entropy_100bins": 0.9534893860190756,
|
|
"calibration/batch_entropy_10bins": 0.967181094225583,
|
|
"calibration/batch_entropy_50bins": 0.9683228737901771,
|
|
"calibration/batch_uniqueness": 0.9549240591535882,
|
|
"calibration/buffer_distribution_entropy": 0.9348020467410049,
|
|
"calibration/buffer_entropy_100bins": 0.8141507430697466,
|
|
"calibration/buffer_entropy_10bins": 0.9348020467410049,
|
|
"calibration/buffer_entropy_50bins": 0.8695878210283782,
|
|
"calibration/confidence_entropy": 0.5490850423679848,
|
|
"calibration/coverage@0%": 0.0027389615949119372,
|
|
"calibration/coverage@1%": 0.0027389615949119372,
|
|
"calibration/coverage@10%": 0.005087298189823874,
|
|
"calibration/coverage@15%": 0.06289979818982387,
|
|
"calibration/coverage@20%": 0.11407167318982388,
|
|
"calibration/coverage@25%": 0.21096043297455966,
|
|
"calibration/coverage@30%": 0.21800085616438353,
|
|
"calibration/coverage@5%": 0.0027389615949119372,
|
|
"calibration/ece": 0.20184070224738887,
|
|
"calibration/mean_confidence": 0.45353375496514436,
|
|
"calibration/prompt_uniqueness": 0.8953690003902185,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 592.2,
|
|
"completions/mean_length": 131.33984375,
|
|
"completions/mean_terminated_length": 130.2405517578125,
|
|
"completions/min_length": 46.2,
|
|
"completions/min_terminated_length": 46.2,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.0017877706559374928,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0037,
|
|
"num_tokens": 149941326.0,
|
|
"reward": 0.9030540823936463,
|
|
"reward_std": 0.13035276234149934,
|
|
"rewards/accuracy_reward": 0.50322265625,
|
|
"rewards/brier_reward": 0.7159843802452087,
|
|
"rewards/confidence_uniqueness_reward": 0.9531710863113403,
|
|
"rewards/format_reward": 0.9986328125,
|
|
"rewards/frontier_aurc_reward": -0.0038798670284450054,
|
|
"rewards/frontier_coverage_0": 0.033135686349123714,
|
|
"rewards/frontier_coverage_1": 0.033135686349123714,
|
|
"rewards/frontier_coverage_10": 0.033135686349123714,
|
|
"rewards/frontier_coverage_15": 0.033135686349123714,
|
|
"rewards/frontier_coverage_20": 0.033135686349123714,
|
|
"rewards/frontier_coverage_25": 0.033135686349123714,
|
|
"rewards/frontier_coverage_5": 0.033135686349123714,
|
|
"rewards/frontier_ece_reward": 0.010891084442846477,
|
|
"rewards/frontier_entropy_batch_reward": -0.18729186952114105,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.165521240234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.2,
|
|
"signal/accuracy_reward/group_std_mean": 0.21491027772426605,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0827606201171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0827606201171875,
|
|
"signal/advantage_abs_mean": 0.10254481285810471,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10254481285810471,
|
|
"signal/advantage_pre_scale_std": 0.1436397671699524,
|
|
"signal/advantage_std": 0.1436397671699524,
|
|
"signal/brier_reward/centered_abs_mean": 0.19424692094326018,
|
|
"signal/brier_reward/group_bin_occupancy": 0.930078125,
|
|
"signal/brier_reward/group_std_mean": 0.24112644791603088,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019424692168831824,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019424692168831824,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01740786787122488,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93984375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02573142237961292,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0017407866893336178,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017407866893336178,
|
|
"signal/format_reward/centered_abs_mean": 0.00264892578125,
|
|
"signal/format_reward/group_bin_occupancy": 0.13046875,
|
|
"signal/format_reward/group_std_mean": 0.007733980286866426,
|
|
"signal/format_reward/group_zero_std_frac": 0.95625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018934236606583,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7984375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027514519169926643,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.366779626754578e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.366779626754578e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2569372057914734,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.95078125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3219131588935852,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2569372057914734,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.95078125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3219131588935852,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2569372057914734,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.95078125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3219131588935852,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2569372057914734,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.95078125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3219131588935852,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2569372057914734,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.95078125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3219131588935852,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2569372057914734,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.95078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3219131588935852,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2569372057914734,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.95078125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3219131588935852,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00321171497926116,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0656904973089695,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.807421875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08761606812477112,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0065690501593053344,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0065690501593053344,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27559973001480104,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.846484375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3480221152305603,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027559973299503326,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027559973299503326,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.44252966701627444,
|
|
"calibration/batch_distribution_entropy": 0.9766154522720358,
|
|
"calibration/batch_entropy_100bins": 0.9551690729484854,
|
|
"calibration/batch_entropy_10bins": 0.9766154522720358,
|
|
"calibration/batch_entropy_50bins": 0.9720948064609075,
|
|
"calibration/batch_uniqueness": 0.9577404992006823,
|
|
"calibration/buffer_distribution_entropy": 0.9504300762276795,
|
|
"calibration/buffer_entropy_100bins": 0.8514806183722019,
|
|
"calibration/buffer_entropy_10bins": 0.9504300762276795,
|
|
"calibration/buffer_entropy_50bins": 0.8980914859901915,
|
|
"calibration/confidence_entropy": 0.537290060462475,
|
|
"calibration/coverage@0%": 0.002737435727715744,
|
|
"calibration/coverage@1%": 0.002737435727715744,
|
|
"calibration/coverage@10%": 0.00625994062008365,
|
|
"calibration/coverage@15%": 0.00625994062008365,
|
|
"calibration/coverage@20%": 0.010564459944936879,
|
|
"calibration/coverage@25%": 0.021541782380760523,
|
|
"calibration/coverage@30%": 0.04388704925463336,
|
|
"calibration/coverage@5%": 0.002737435727715744,
|
|
"calibration/ece": 0.14565709539204225,
|
|
"calibration/mean_confidence": 0.5353336754902873,
|
|
"calibration/prompt_uniqueness": 0.8973251380669008,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1338.4,
|
|
"completions/max_terminated_length": 639.8,
|
|
"completions/mean_length": 139.0447265625,
|
|
"completions/mean_terminated_length": 138.36163940429688,
|
|
"completions/min_length": 53.8,
|
|
"completions/min_terminated_length": 53.8,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0017825138056650758,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 166386072.0,
|
|
"reward": 0.8922484874725342,
|
|
"reward_std": 0.13068339228630066,
|
|
"rewards/accuracy_reward": 0.46005859375,
|
|
"rewards/brier_reward": 0.7271055817604065,
|
|
"rewards/confidence_uniqueness_reward": 0.9578215956687928,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.004173227492719889,
|
|
"rewards/frontier_coverage_0": 0.07299970909953117,
|
|
"rewards/frontier_coverage_1": 0.07299970909953117,
|
|
"rewards/frontier_coverage_10": 0.07299970909953117,
|
|
"rewards/frontier_coverage_15": 0.07299970909953117,
|
|
"rewards/frontier_coverage_20": 0.07299970909953117,
|
|
"rewards/frontier_coverage_25": 0.07299970909953117,
|
|
"rewards/frontier_coverage_5": 0.07299970909953117,
|
|
"rewards/frontier_ece_reward": 0.011208084784448147,
|
|
"rewards/frontier_entropy_batch_reward": -0.13387853503227234,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.156707763671875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.19375,
|
|
"signal/accuracy_reward/group_std_mean": 0.20061389803886415,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.45,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0783538818359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0783538818359375,
|
|
"signal/advantage_abs_mean": 0.10416142642498016,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10416142642498016,
|
|
"signal/advantage_pre_scale_std": 0.14782364070415496,
|
|
"signal/advantage_std": 0.14782364070415496,
|
|
"signal/brier_reward/centered_abs_mean": 0.19167569279670715,
|
|
"signal/brier_reward/group_bin_occupancy": 0.9265625,
|
|
"signal/brier_reward/group_std_mean": 0.23840481042861938,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019167570024728776,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019167570024728776,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013879508711397648,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.948828125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018870834633708,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013879508711397647,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013879508711397647,
|
|
"signal/format_reward/centered_abs_mean": 0.001312255859375,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0035306816454976795,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025133413262665273,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.814453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003546137036755681,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.141676788800396e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.141676788800396e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21136297285556793,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2744253635406494,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21136297285556793,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2744253635406494,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21136297285556793,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2744253635406494,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21136297285556793,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2744253635406494,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21136297285556793,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2744253635406494,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21136297285556793,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2744253635406494,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21136297285556793,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2744253635406494,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002642037160694599,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07558847218751907,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.81328125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.09702952355146408,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0075588468462228775,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0075588468462228775,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21678448021411895,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.81953125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2975514531135559,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021678448468446732,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021678448468446732,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.5640482398747311,
|
|
"eval_calibration/batch_distribution_entropy": 0.9338984167163116,
|
|
"eval_calibration/batch_entropy_100bins": 0.6999477876337099,
|
|
"eval_calibration/batch_entropy_10bins": 0.9338984167163116,
|
|
"eval_calibration/batch_entropy_50bins": 0.796822593270633,
|
|
"eval_calibration/batch_uniqueness": 0.9091796875,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9562562919532525,
|
|
"eval_calibration/buffer_entropy_100bins": 0.8692259796843912,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9562562919532525,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9110833803907854,
|
|
"eval_calibration/confidence_entropy": 0.5144369135960669,
|
|
"eval_calibration/coverage@0%": 0.0078125,
|
|
"eval_calibration/coverage@1%": 0.0078125,
|
|
"eval_calibration/coverage@10%": 0.0078125,
|
|
"eval_calibration/coverage@15%": 0.0078125,
|
|
"eval_calibration/coverage@20%": 0.046875,
|
|
"eval_calibration/coverage@25%": 0.09375,
|
|
"eval_calibration/coverage@30%": 0.09375,
|
|
"eval_calibration/coverage@5%": 0.0078125,
|
|
"eval_calibration/ece": 0.28908419881089126,
|
|
"eval_calibration/mean_confidence": 0.5618975390120369,
|
|
"eval_calibration/prompt_uniqueness": 0.9091796875,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 330.5,
|
|
"eval_completions/max_terminated_length": 330.5,
|
|
"eval_completions/mean_length": 145.39965057373047,
|
|
"eval_completions/mean_terminated_length": 145.39965057373047,
|
|
"eval_completions/min_length": 69.75,
|
|
"eval_completions/min_terminated_length": 69.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 166386072.0,
|
|
"eval_reward": 0.78005750477314,
|
|
"eval_reward_std": 0.24558523669838905,
|
|
"eval_rewards/accuracy_reward": 0.341796875,
|
|
"eval_rewards/brier_reward": 0.7065591365098953,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.90478515625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.005420909612439573,
|
|
"eval_rewards/frontier_coverage_0": 0.13509072735905647,
|
|
"eval_rewards/frontier_coverage_1": 0.13509072735905647,
|
|
"eval_rewards/frontier_coverage_10": 0.13509072735905647,
|
|
"eval_rewards/frontier_coverage_15": 0.13509072735905647,
|
|
"eval_rewards/frontier_coverage_20": 0.13509072735905647,
|
|
"eval_rewards/frontier_coverage_25": 0.13509072735905647,
|
|
"eval_rewards/frontier_coverage_5": 0.13509072735905647,
|
|
"eval_rewards/frontier_ece_reward": -0.00845025188755244,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6288299560546875,
|
|
"eval_runtime": 18.697,
|
|
"eval_samples_per_second": 26.742,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4388427734375,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4750789478421211,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.21942138671875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21942138671875,
|
|
"eval_signal/advantage_abs_mean": 0.2188284732401371,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2188284732401371,
|
|
"eval_signal/advantage_pre_scale_std": 0.24322915077209473,
|
|
"eval_signal/advantage_std": 0.24322915077209473,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2244720533490181,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.9375,
|
|
"eval_signal/brier_reward/group_std_mean": 0.27552034705877304,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022447205148637295,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022447205148637295,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0378265380859375,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.359375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04345181304961443,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003782653948292136,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003782653948292136,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004264666116796434,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8828125,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005977678927592933,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3308327551349066e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3308327551349066e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.24422482028603554,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.3201175183057785,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.24422482028603554,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3201175183057785,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.24422482028603554,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.3201175183057785,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.24422482028603554,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3201175183057785,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.24422482028603554,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3201175183057785,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.24422482028603554,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3201175183057785,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.24422482028603554,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.3201175183057785,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030528103816322982,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.09522267617285252,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8671875,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.12967629730701447,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009522267850115895,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009522267850115895,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3184318542480469,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3345780223608017,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03184318542480469,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03184318542480469,
|
|
"eval_steps_per_second": 0.214,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.42845597095272925,
|
|
"calibration/batch_distribution_entropy": 0.9743751131787548,
|
|
"calibration/batch_entropy_100bins": 0.9510459343045478,
|
|
"calibration/batch_entropy_10bins": 0.9743751131787548,
|
|
"calibration/batch_entropy_50bins": 0.9671359424561432,
|
|
"calibration/batch_uniqueness": 0.9603113397334159,
|
|
"calibration/buffer_distribution_entropy": 0.9585476127536652,
|
|
"calibration/buffer_entropy_100bins": 0.8791153575811542,
|
|
"calibration/buffer_entropy_10bins": 0.9585476127536652,
|
|
"calibration/buffer_entropy_50bins": 0.9184203673259261,
|
|
"calibration/confidence_entropy": 0.49344108216528165,
|
|
"calibration/coverage@0%": 0.00078125,
|
|
"calibration/coverage@1%": 0.00078125,
|
|
"calibration/coverage@10%": 0.00078125,
|
|
"calibration/coverage@15%": 0.00078125,
|
|
"calibration/coverage@20%": 0.00078125,
|
|
"calibration/coverage@25%": 0.00078125,
|
|
"calibration/coverage@30%": 0.06173938967710372,
|
|
"calibration/coverage@5%": 0.00078125,
|
|
"calibration/ece": 0.19620665052494485,
|
|
"calibration/mean_confidence": 0.5846989709145939,
|
|
"calibration/prompt_uniqueness": 0.8889120524681321,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 665.2,
|
|
"completions/max_terminated_length": 458.2,
|
|
"completions/mean_length": 146.85908203125,
|
|
"completions/mean_terminated_length": 146.58725280761718,
|
|
"completions/min_length": 54.4,
|
|
"completions/min_terminated_length": 54.4,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0016376320272684097,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 183127029.0,
|
|
"reward": 0.8913762331008911,
|
|
"reward_std": 0.13524161875247956,
|
|
"rewards/accuracy_reward": 0.4572265625,
|
|
"rewards/brier_reward": 0.7286172389984131,
|
|
"rewards/confidence_uniqueness_reward": 0.9600118160247803,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.004195977700874209,
|
|
"rewards/frontier_coverage_0": 0.09039346724748612,
|
|
"rewards/frontier_coverage_1": 0.09039346724748612,
|
|
"rewards/frontier_coverage_10": 0.09039346724748612,
|
|
"rewards/frontier_coverage_15": 0.09039346724748612,
|
|
"rewards/frontier_coverage_20": 0.09039346724748612,
|
|
"rewards/frontier_coverage_25": 0.09039346724748612,
|
|
"rewards/frontier_coverage_5": 0.09039346724748612,
|
|
"rewards/frontier_ece_reward": 0.01440376602113247,
|
|
"rewards/frontier_entropy_batch_reward": -0.1510435476899147,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1582275390625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.194140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.2026852160692215,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.07911376953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07911376953125,
|
|
"signal/advantage_abs_mean": 0.10643114447593689,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10643114447593689,
|
|
"signal/advantage_pre_scale_std": 0.1524705171585083,
|
|
"signal/advantage_std": 0.1524705171585083,
|
|
"signal/brier_reward/centered_abs_mean": 0.19858744144439697,
|
|
"signal/brier_reward/group_bin_occupancy": 0.90390625,
|
|
"signal/brier_reward/group_std_mean": 0.24619080722332,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019858743995428085,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019858743995428085,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013402053527534008,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.914453125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018923624232411385,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013402053853496909,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013402053853496909,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029607733245939015,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.816796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004161783494055271,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.700966844917275e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.700966844917275e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20848225355148314,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27132275700569153,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20848225355148314,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27132275700569153,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20848225355148314,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27132275700569153,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20848225355148314,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27132275700569153,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20848225355148314,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27132275700569153,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20848225355148314,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27132275700569153,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20848225355148314,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.898046875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27132275700569153,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026060281787067653,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07936635911464691,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.798828125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10026746243238449,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007936635799705983,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007936635799705983,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.23007346987724303,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.815234375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.305187976360321,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02300734743475914,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02300734743475914,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36150441430089725,
|
|
"calibration/batch_distribution_entropy": 0.9809884793671644,
|
|
"calibration/batch_entropy_100bins": 0.9553038792513879,
|
|
"calibration/batch_entropy_10bins": 0.9809884793671644,
|
|
"calibration/batch_entropy_50bins": 0.9725050248395928,
|
|
"calibration/batch_uniqueness": 0.9608878320253972,
|
|
"calibration/buffer_distribution_entropy": 0.962701946306086,
|
|
"calibration/buffer_entropy_100bins": 0.8976021302572151,
|
|
"calibration/buffer_entropy_10bins": 0.962701946306086,
|
|
"calibration/buffer_entropy_50bins": 0.931815885579519,
|
|
"calibration/confidence_entropy": 0.4694115267660267,
|
|
"calibration/coverage@0%": 0.003913894324853229,
|
|
"calibration/coverage@1%": 0.003913894324853229,
|
|
"calibration/coverage@10%": 0.003913894324853229,
|
|
"calibration/coverage@15%": 0.003913894324853229,
|
|
"calibration/coverage@20%": 0.06016848091976516,
|
|
"calibration/coverage@25%": 0.1422547700587084,
|
|
"calibration/coverage@30%": 0.36152687744618395,
|
|
"calibration/coverage@5%": 0.003913894324853229,
|
|
"calibration/ece": 0.13340543732164134,
|
|
"calibration/mean_confidence": 0.5382121094846762,
|
|
"calibration/prompt_uniqueness": 0.8838947974928459,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1094.8,
|
|
"completions/max_terminated_length": 448.8,
|
|
"completions/mean_length": 155.36318359375,
|
|
"completions/mean_terminated_length": 154.8236053466797,
|
|
"completions/min_length": 60.6,
|
|
"completions/min_terminated_length": 60.6,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.0012104158522561193,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 199532764.0,
|
|
"reward": 0.9095940351486206,
|
|
"reward_std": 0.12004156708717346,
|
|
"rewards/accuracy_reward": 0.48525390625,
|
|
"rewards/brier_reward": 0.7442206859588623,
|
|
"rewards/confidence_uniqueness_reward": 0.9607667922973633,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.003671001689508557,
|
|
"rewards/frontier_coverage_0": 0.10046138539910317,
|
|
"rewards/frontier_coverage_1": 0.10046138539910317,
|
|
"rewards/frontier_coverage_10": 0.10046138539910317,
|
|
"rewards/frontier_coverage_15": 0.10046138539910317,
|
|
"rewards/frontier_coverage_20": 0.10046138539910317,
|
|
"rewards/frontier_coverage_25": 0.10046138539910317,
|
|
"rewards/frontier_coverage_5": 0.10046138539910317,
|
|
"rewards/frontier_ece_reward": 0.02302660271525383,
|
|
"rewards/frontier_entropy_batch_reward": -0.1423702985048294,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.138128662109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.18108512461185455,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0690643310546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0690643310546875,
|
|
"signal/advantage_abs_mean": 0.09277227818965912,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09277227818965912,
|
|
"signal/advantage_pre_scale_std": 0.13744349181652069,
|
|
"signal/advantage_std": 0.13744349181652069,
|
|
"signal/brier_reward/centered_abs_mean": 0.19675520658493043,
|
|
"signal/brier_reward/group_bin_occupancy": 0.88359375,
|
|
"signal/brier_reward/group_std_mean": 0.24640358686447145,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019675521552562712,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019675521552562712,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014549448899924756,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.853515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.021208246052265168,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014549449319019915,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014549449319019915,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.127734375,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026927752885967495,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.79921875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038467171136289833,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3659690961940215e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3659690961940215e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22805612087249755,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29446661472320557,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22805612087249755,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29446661472320557,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22805612087249755,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29446661472320557,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22805612087249755,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29446661472320557,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22805612087249755,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29446661472320557,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22805612087249755,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29446661472320557,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22805612087249755,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29446661472320557,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028507016133517025,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.07045196145772933,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08983934074640273,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007045195996761322,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007045195996761322,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2271820455789566,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.756640625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3046163022518158,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02271820567548275,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02271820567548275,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30856681251726314,
|
|
"calibration/batch_distribution_entropy": 0.976034904740984,
|
|
"calibration/batch_entropy_100bins": 0.9546305905247134,
|
|
"calibration/batch_entropy_10bins": 0.976034904740984,
|
|
"calibration/batch_entropy_50bins": 0.9708003879239427,
|
|
"calibration/batch_uniqueness": 0.9573702970661492,
|
|
"calibration/buffer_distribution_entropy": 0.9666963516015349,
|
|
"calibration/buffer_entropy_100bins": 0.9110968940643943,
|
|
"calibration/buffer_entropy_10bins": 0.9666963516015349,
|
|
"calibration/buffer_entropy_50bins": 0.9410400607285831,
|
|
"calibration/confidence_entropy": 0.46580401448310244,
|
|
"calibration/coverage@0%": 0.0070343077299412915,
|
|
"calibration/coverage@1%": 0.0070343077299412915,
|
|
"calibration/coverage@10%": 0.09375305772994129,
|
|
"calibration/coverage@15%": 0.1668335677592955,
|
|
"calibration/coverage@20%": 0.3592664505870842,
|
|
"calibration/coverage@25%": 0.47539674045988256,
|
|
"calibration/coverage@30%": 0.5375703277886498,
|
|
"calibration/coverage@5%": 0.01797180772994129,
|
|
"calibration/ece": 0.17415111158550767,
|
|
"calibration/mean_confidence": 0.48247349070355083,
|
|
"calibration/prompt_uniqueness": 0.8831559735952134,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 864.6,
|
|
"completions/max_terminated_length": 429.2,
|
|
"completions/mean_length": 167.91220703125,
|
|
"completions/mean_terminated_length": 167.64508056640625,
|
|
"completions/min_length": 64.6,
|
|
"completions/min_terminated_length": 64.6,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0012852454092353582,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 216284409.0,
|
|
"reward": 0.9297733783721924,
|
|
"reward_std": 0.11357135176658631,
|
|
"rewards/accuracy_reward": 0.521875,
|
|
"rewards/brier_reward": 0.7487919449806213,
|
|
"rewards/confidence_uniqueness_reward": 0.9588976621627807,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.003094815369695425,
|
|
"rewards/frontier_coverage_0": 0.08237677216529846,
|
|
"rewards/frontier_coverage_1": 0.08237677216529846,
|
|
"rewards/frontier_coverage_10": 0.08237677216529846,
|
|
"rewards/frontier_coverage_15": 0.08237677216529846,
|
|
"rewards/frontier_coverage_20": 0.08237677216529846,
|
|
"rewards/frontier_coverage_25": 0.08237677216529846,
|
|
"rewards/frontier_coverage_5": 0.08237677216529846,
|
|
"rewards/frontier_ece_reward": 0.025132818147540092,
|
|
"rewards/frontier_entropy_batch_reward": -0.11371518671512604,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14034423828125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.18411757349967955,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.48125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.070172119140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.070172119140625,
|
|
"signal/advantage_abs_mean": 0.08810736685991287,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08810736685991287,
|
|
"signal/advantage_pre_scale_std": 0.1309487298130989,
|
|
"signal/advantage_std": 0.1309487298130989,
|
|
"signal/brier_reward/centered_abs_mean": 0.19533415138721466,
|
|
"signal/brier_reward/group_bin_occupancy": 0.87734375,
|
|
"signal/brier_reward/group_std_mean": 0.24378657042980195,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019533416256308556,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019533416256308556,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014175088331103324,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.910546875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019649384170770647,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014175089076161385,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014175089076161385,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.002762135770171881,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022600206080824136,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.785546875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033125653862953186,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8250257309991865e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8250257309991865e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.25006471276283265,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3173247754573822,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.25006471276283265,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3173247754573822,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.25006471276283265,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3173247754573822,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.25006471276283265,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3173247754573822,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.25006471276283265,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3173247754573822,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.25006471276283265,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3173247754573822,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.25006471276283265,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.894140625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3173247754573822,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003125808946788311,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05986908376216889,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.729296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07776331305503845,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0059869085438549515,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0059869085438549515,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1874374121427536,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.765234375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25852798819541933,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018743741139769555,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018743741139769555,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.330596438844872,
|
|
"calibration/batch_distribution_entropy": 0.9800789256145354,
|
|
"calibration/batch_entropy_100bins": 0.9652221422853536,
|
|
"calibration/batch_entropy_10bins": 0.9800789256145354,
|
|
"calibration/batch_entropy_50bins": 0.9783522166237942,
|
|
"calibration/batch_uniqueness": 0.9553811352943624,
|
|
"calibration/buffer_distribution_entropy": 0.9722985901730118,
|
|
"calibration/buffer_entropy_100bins": 0.9226661190460972,
|
|
"calibration/buffer_entropy_10bins": 0.9722985901730118,
|
|
"calibration/buffer_entropy_50bins": 0.9495554922127216,
|
|
"calibration/confidence_entropy": 0.479133780716967,
|
|
"calibration/coverage@0%": 0.017207375244618395,
|
|
"calibration/coverage@1%": 0.017207375244618395,
|
|
"calibration/coverage@10%": 0.08995994373776908,
|
|
"calibration/coverage@15%": 0.1708827666340509,
|
|
"calibration/coverage@20%": 0.2517765410958904,
|
|
"calibration/coverage@25%": 0.38699394569471623,
|
|
"calibration/coverage@30%": 0.4792227250489237,
|
|
"calibration/coverage@5%": 0.019947101272015655,
|
|
"calibration/ece": 0.18975004661276867,
|
|
"calibration/mean_confidence": 0.4268005781223603,
|
|
"calibration/prompt_uniqueness": 0.879099580921566,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1016.2,
|
|
"completions/max_terminated_length": 609.6,
|
|
"completions/mean_length": 177.30166015625,
|
|
"completions/mean_terminated_length": 176.6397247314453,
|
|
"completions/min_length": 73.6,
|
|
"completions/min_terminated_length": 73.6,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0010515432804822922,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 233253162.0,
|
|
"reward": 0.9137272119522095,
|
|
"reward_std": 0.10092450678348541,
|
|
"rewards/accuracy_reward": 0.4833984375,
|
|
"rewards/brier_reward": 0.7625031471252441,
|
|
"rewards/confidence_uniqueness_reward": 0.9558073043823242,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0030704465694725513,
|
|
"rewards/frontier_coverage_0": 0.12124817669391633,
|
|
"rewards/frontier_coverage_1": 0.12124817669391633,
|
|
"rewards/frontier_coverage_10": 0.12124817669391633,
|
|
"rewards/frontier_coverage_15": 0.12124817669391633,
|
|
"rewards/frontier_coverage_20": 0.12124817669391633,
|
|
"rewards/frontier_coverage_25": 0.12124817669391633,
|
|
"rewards/frontier_coverage_5": 0.12124817669391633,
|
|
"rewards/frontier_ece_reward": 0.021660603955388068,
|
|
"rewards/frontier_entropy_batch_reward": -0.12246965020895004,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11944580078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.15991852879524232,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059722900390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059722900390625,
|
|
"signal/advantage_abs_mean": 0.07758828401565551,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07758828401565551,
|
|
"signal/advantage_pre_scale_std": 0.11865905672311783,
|
|
"signal/advantage_std": 0.11865905672311783,
|
|
"signal/brier_reward/centered_abs_mean": 0.17773639261722565,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8609375,
|
|
"signal/brier_reward/group_std_mean": 0.22495804727077484,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017773639410734177,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017773639410734177,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014823544770479202,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.925,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.020474844425916672,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001482354523614049,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001482354523614049,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019347959896549582,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.779296875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0028501675464212895,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4184949143091217e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4184949143091217e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2396583765745163,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3042136013507843,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2396583765745163,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3042136013507843,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2396583765745163,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3042136013507843,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2396583765745163,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3042136013507843,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2396583765745163,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3042136013507843,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2396583765745163,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3042136013507843,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2396583765745163,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.9046875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3042136013507843,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002995729772374034,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.048828136175870895,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.698046875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06492637246847152,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004882813710719347,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004882813710719347,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18858475387096404,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76640625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2542591840028763,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01885847598314285,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01885847598314285,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.37536408814265015,
|
|
"calibration/batch_distribution_entropy": 0.9771937196120367,
|
|
"calibration/batch_entropy_100bins": 0.9607011820953872,
|
|
"calibration/batch_entropy_10bins": 0.9771937196120367,
|
|
"calibration/batch_entropy_50bins": 0.9735030092986381,
|
|
"calibration/batch_uniqueness": 0.9562491780901577,
|
|
"calibration/buffer_distribution_entropy": 0.9769076636131834,
|
|
"calibration/buffer_entropy_100bins": 0.9320570034740199,
|
|
"calibration/buffer_entropy_10bins": 0.9769076636131834,
|
|
"calibration/buffer_entropy_50bins": 0.9563579445536927,
|
|
"calibration/confidence_entropy": 0.5054295072946868,
|
|
"calibration/coverage@0%": 0.011331947162426614,
|
|
"calibration/coverage@1%": 0.011331947162426614,
|
|
"calibration/coverage@10%": 0.07734757216242662,
|
|
"calibration/coverage@15%": 0.1140663221624266,
|
|
"calibration/coverage@20%": 0.1898475721624266,
|
|
"calibration/coverage@25%": 0.2398475721624266,
|
|
"calibration/coverage@30%": 0.2921913221624266,
|
|
"calibration/coverage@5%": 0.04609757216242662,
|
|
"calibration/ece": 0.1579841463548282,
|
|
"calibration/mean_confidence": 0.48800210324158166,
|
|
"calibration/prompt_uniqueness": 0.884576765332336,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 705.4,
|
|
"completions/mean_length": 184.08701171875,
|
|
"completions/mean_terminated_length": 183.16248779296876,
|
|
"completions/min_length": 73.4,
|
|
"completions/min_terminated_length": 73.4,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0011616898700594902,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0023,
|
|
"num_tokens": 250389893.0,
|
|
"reward": 0.9384328126907349,
|
|
"reward_std": 0.11288020461797714,
|
|
"rewards/accuracy_reward": 0.5419921875,
|
|
"rewards/brier_reward": 0.7529522061347962,
|
|
"rewards/confidence_uniqueness_reward": 0.9568016529083252,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.002845893194898963,
|
|
"rewards/frontier_coverage_0": 0.06113246735185385,
|
|
"rewards/frontier_coverage_1": 0.06113246735185385,
|
|
"rewards/frontier_coverage_10": 0.06113246735185385,
|
|
"rewards/frontier_coverage_15": 0.06113246735185385,
|
|
"rewards/frontier_coverage_20": 0.06113246735185385,
|
|
"rewards/frontier_coverage_25": 0.06113246735185385,
|
|
"rewards/frontier_coverage_5": 0.06113246735185385,
|
|
"rewards/frontier_ece_reward": 0.022722626104950905,
|
|
"rewards/frontier_entropy_batch_reward": -0.10733838081359863,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14188232421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.191796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.18681408166885377,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.465625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.070941162109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.070941162109375,
|
|
"signal/advantage_abs_mean": 0.08786697685718536,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08786697685718536,
|
|
"signal/advantage_pre_scale_std": 0.13164357095956802,
|
|
"signal/advantage_std": 0.13164357095956802,
|
|
"signal/brier_reward/centered_abs_mean": 0.18108824789524078,
|
|
"signal/brier_reward/group_bin_occupancy": 0.88046875,
|
|
"signal/brier_reward/group_std_mean": 0.22850363552570344,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018108825013041497,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018108825013041497,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01398250348865986,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.937109375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019881158694624902,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013982503674924373,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013982503674924373,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_bin_occupancy": 0.128125,
|
|
"signal/format_reward/group_std_mean": 0.004419417306780815,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021438012598082425,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031446309760212897,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.679751632967964e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.679751632967964e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23010546565055848,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.90546875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.29437238574028013,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23010546565055848,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.90546875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.29437238574028013,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23010546565055848,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.90546875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.29437238574028013,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23010546565055848,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.90546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.29437238574028013,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23010546565055848,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.90546875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.29437238574028013,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23010546565055848,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.90546875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.29437238574028013,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23010546565055848,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.90546875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.29437238574028013,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028763184323906898,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04993258342146874,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.71328125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0665904238820076,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004993258509784937,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004993258509784937,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17947104573249817,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7984375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24450061917304994,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017947105318307878,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017947105318307878,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3088122502636872,
|
|
"calibration/batch_distribution_entropy": 0.9862230260996881,
|
|
"calibration/batch_entropy_100bins": 0.9645962804983934,
|
|
"calibration/batch_entropy_10bins": 0.9862230260996881,
|
|
"calibration/batch_entropy_50bins": 0.979222583210366,
|
|
"calibration/batch_uniqueness": 0.9582794189453125,
|
|
"calibration/buffer_distribution_entropy": 0.9794717446767558,
|
|
"calibration/buffer_entropy_100bins": 0.9393690519837218,
|
|
"calibration/buffer_entropy_10bins": 0.9794717446767558,
|
|
"calibration/buffer_entropy_50bins": 0.961310714439325,
|
|
"calibration/confidence_entropy": 0.48274468276614363,
|
|
"calibration/coverage@0%": 0.000390625,
|
|
"calibration/coverage@1%": 0.000390625,
|
|
"calibration/coverage@10%": 0.066015625,
|
|
"calibration/coverage@15%": 0.230859375,
|
|
"calibration/coverage@20%": 0.31796875,
|
|
"calibration/coverage@25%": 0.46640625,
|
|
"calibration/coverage@30%": 0.582421875,
|
|
"calibration/coverage@5%": 0.000390625,
|
|
"calibration/ece": 0.12116486342122168,
|
|
"calibration/mean_confidence": 0.512004733953818,
|
|
"calibration/prompt_uniqueness": 0.882861328125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 1173.8,
|
|
"completions/max_terminated_length": 689.0,
|
|
"completions/mean_length": 182.08017578125,
|
|
"completions/mean_terminated_length": 181.02371826171876,
|
|
"completions/min_length": 80.4,
|
|
"completions/min_terminated_length": 80.4,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0011187122436240315,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 267309210.0,
|
|
"reward": 0.9286181807518006,
|
|
"reward_std": 0.1064249649643898,
|
|
"rewards/accuracy_reward": 0.513671875,
|
|
"rewards/brier_reward": 0.7663417935371399,
|
|
"rewards/confidence_uniqueness_reward": 0.9569314360618592,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.0030292498413473368,
|
|
"rewards/frontier_coverage_0": 0.09447629451751709,
|
|
"rewards/frontier_coverage_1": 0.09447629451751709,
|
|
"rewards/frontier_coverage_10": 0.09447629451751709,
|
|
"rewards/frontier_coverage_15": 0.09447629451751709,
|
|
"rewards/frontier_coverage_20": 0.09447629451751709,
|
|
"rewards/frontier_coverage_25": 0.09447629451751709,
|
|
"rewards/frontier_coverage_5": 0.09447629451751709,
|
|
"rewards/frontier_ece_reward": 0.02433442622423172,
|
|
"rewards/frontier_entropy_batch_reward": -0.10767877101898193,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13363037109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.17056742310523987,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.066815185546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.066815185546875,
|
|
"signal/advantage_abs_mean": 0.08388981521129608,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08388981521129608,
|
|
"signal/advantage_pre_scale_std": 0.12852715700864792,
|
|
"signal/advantage_std": 0.12852715700864792,
|
|
"signal/brier_reward/centered_abs_mean": 0.1720704823732376,
|
|
"signal/brier_reward/group_bin_occupancy": 0.863671875,
|
|
"signal/brier_reward/group_std_mean": 0.21704732179641723,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01720704808831215,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01720704808831215,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013470648415386676,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.943359375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018156062439084054,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013470648787915706,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013470648787915706,
|
|
"signal/format_reward/centered_abs_mean": 0.001580810546875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.0033625275362282993,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007904052734375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007904052734375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024374906904995443,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.770703125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035979004576802255,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0468634213320912e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0468634213320912e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2125555694103241,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2719772934913635,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2125555694103241,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2719772934913635,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2125555694103241,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2719772934913635,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2125555694103241,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2719772934913635,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2125555694103241,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2719772934913635,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2125555694103241,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2719772934913635,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2125555694103241,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2719772934913635,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026569446548819543,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.050062181800603865,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.685546875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06556581407785415,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050062181428074835,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050062181428074835,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.175579434633255,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.78125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23965271115303038,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017557943984866143,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017557943984866143,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3872883248462567,
|
|
"calibration/batch_distribution_entropy": 0.9911252411531031,
|
|
"calibration/batch_entropy_100bins": 0.9668056025762068,
|
|
"calibration/batch_entropy_10bins": 0.9911252411531031,
|
|
"calibration/batch_entropy_50bins": 0.9829774945880942,
|
|
"calibration/batch_uniqueness": 0.9590049221415742,
|
|
"calibration/buffer_distribution_entropy": 0.9814864706391244,
|
|
"calibration/buffer_entropy_100bins": 0.945556175624465,
|
|
"calibration/buffer_entropy_10bins": 0.9814864706391244,
|
|
"calibration/buffer_entropy_50bins": 0.9654573043868198,
|
|
"calibration/confidence_entropy": 0.4982663637951156,
|
|
"calibration/coverage@0%": 0.001171875,
|
|
"calibration/coverage@1%": 0.001171875,
|
|
"calibration/coverage@10%": 0.042723651960784315,
|
|
"calibration/coverage@15%": 0.11563265931372549,
|
|
"calibration/coverage@20%": 0.16890624999999998,
|
|
"calibration/coverage@25%": 0.2233057598039216,
|
|
"calibration/coverage@30%": 0.32767310049019605,
|
|
"calibration/coverage@5%": 0.001171875,
|
|
"calibration/ece": 0.15179049772902994,
|
|
"calibration/mean_confidence": 0.5230270996410378,
|
|
"calibration/prompt_uniqueness": 0.8800985243055555,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1133.8,
|
|
"completions/max_terminated_length": 661.6,
|
|
"completions/mean_length": 187.561328125,
|
|
"completions/mean_terminated_length": 187.0335693359375,
|
|
"completions/min_length": 82.2,
|
|
"completions/min_terminated_length": 82.2,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0012320392997935414,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 284195534.0,
|
|
"reward": 0.9237002849578857,
|
|
"reward_std": 0.10775048434734344,
|
|
"rewards/accuracy_reward": 0.5041015625,
|
|
"rewards/brier_reward": 0.7605077266693115,
|
|
"rewards/confidence_uniqueness_reward": 0.9581493377685547,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0031376248225569725,
|
|
"rewards/frontier_coverage_0": 0.09240868501365185,
|
|
"rewards/frontier_coverage_1": 0.09240868501365185,
|
|
"rewards/frontier_coverage_10": 0.09240868501365185,
|
|
"rewards/frontier_coverage_15": 0.09240868501365185,
|
|
"rewards/frontier_coverage_20": 0.09240868501365185,
|
|
"rewards/frontier_coverage_25": 0.09240868501365185,
|
|
"rewards/frontier_coverage_5": 0.09240868501365185,
|
|
"rewards/frontier_ece_reward": 0.022019311785697937,
|
|
"rewards/frontier_entropy_batch_reward": -0.10122893005609512,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13126220703125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.180859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.16641454994678498,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065631103515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.065631103515625,
|
|
"signal/advantage_abs_mean": 0.08459821194410325,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08459821194410325,
|
|
"signal/advantage_pre_scale_std": 0.13007204085588456,
|
|
"signal/advantage_std": 0.13007204085588456,
|
|
"signal/brier_reward/centered_abs_mean": 0.16944461762905122,
|
|
"signal/brier_reward/group_bin_occupancy": 0.869140625,
|
|
"signal/brier_reward/group_std_mean": 0.2140252709388733,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016944462060928346,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016944462060928346,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012623942643404006,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.930078125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017593150585889818,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012623942689970135,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012623942689970135,
|
|
"signal/format_reward/centered_abs_mean": 0.001312255859375,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0035306816920638085,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002525777369737625,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.78125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036961573641747236,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1572217631037346e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1572217631037346e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20187339186668396,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.893359375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2607047349214554,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20187339186668396,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.893359375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2607047349214554,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20187339186668396,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.893359375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2607047349214554,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20187339186668396,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.893359375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2607047349214554,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20187339186668396,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.893359375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2607047349214554,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20187339186668396,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.893359375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2607047349214554,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20187339186668396,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.893359375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2607047349214554,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002523417491465807,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04795216247439384,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.662109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06269470900297165,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004795216396450997,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004795216396450997,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16949324011802674,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.756640625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23497817516326905,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01694932412356138,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01694932412356138,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34581066310791553,
|
|
"calibration/batch_distribution_entropy": 0.9819802009108018,
|
|
"calibration/batch_entropy_100bins": 0.9615195052648016,
|
|
"calibration/batch_entropy_10bins": 0.9819802009108018,
|
|
"calibration/batch_entropy_50bins": 0.9762919627953279,
|
|
"calibration/batch_uniqueness": 0.958841547683193,
|
|
"calibration/buffer_distribution_entropy": 0.9829452406186754,
|
|
"calibration/buffer_entropy_100bins": 0.9506370434085273,
|
|
"calibration/buffer_entropy_10bins": 0.9829452406186754,
|
|
"calibration/buffer_entropy_50bins": 0.9689288935812674,
|
|
"calibration/confidence_entropy": 0.4889890321665257,
|
|
"calibration/coverage@0%": 0.00390625,
|
|
"calibration/coverage@1%": 0.00390625,
|
|
"calibration/coverage@10%": 0.03678296232876712,
|
|
"calibration/coverage@15%": 0.0778780883072407,
|
|
"calibration/coverage@20%": 0.15996131971624267,
|
|
"calibration/coverage@25%": 0.20687912793542074,
|
|
"calibration/coverage@30%": 0.25455525318003913,
|
|
"calibration/coverage@5%": 0.012908206947162427,
|
|
"calibration/ece": 0.12166334605382478,
|
|
"calibration/mean_confidence": 0.5602370063816919,
|
|
"calibration/prompt_uniqueness": 0.88345570003577,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 867.0,
|
|
"completions/max_terminated_length": 444.6,
|
|
"completions/mean_length": 183.744921875,
|
|
"completions/mean_terminated_length": 183.48092651367188,
|
|
"completions/min_length": 83.0,
|
|
"completions/min_terminated_length": 83.0,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.001005663420073688,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 301035258.0,
|
|
"reward": 0.9296231985092163,
|
|
"reward_std": 0.10486756712198257,
|
|
"rewards/accuracy_reward": 0.51689453125,
|
|
"rewards/brier_reward": 0.7634823560714722,
|
|
"rewards/confidence_uniqueness_reward": 0.957963502407074,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0030765291303396225,
|
|
"rewards/frontier_coverage_0": 0.09391801804304123,
|
|
"rewards/frontier_coverage_1": 0.09391801804304123,
|
|
"rewards/frontier_coverage_10": 0.09391801804304123,
|
|
"rewards/frontier_coverage_15": 0.09391801804304123,
|
|
"rewards/frontier_coverage_20": 0.09391801804304123,
|
|
"rewards/frontier_coverage_25": 0.09391801804304123,
|
|
"rewards/frontier_coverage_5": 0.09391801804304123,
|
|
"rewards/frontier_ece_reward": 0.022438769787549974,
|
|
"rewards/frontier_entropy_batch_reward": -0.11196594089269638,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.128155517578125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.185546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.16995208263397216,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0640777587890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0640777587890625,
|
|
"signal/advantage_abs_mean": 0.08041936606168747,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08041936606168747,
|
|
"signal/advantage_pre_scale_std": 0.1254291296005249,
|
|
"signal/advantage_std": 0.1254291296005249,
|
|
"signal/brier_reward/centered_abs_mean": 0.1697759747505188,
|
|
"signal/brier_reward/group_bin_occupancy": 0.859765625,
|
|
"signal/brier_reward/group_std_mean": 0.21578840911388397,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016977597773075104,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016977597773075104,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012654472142457962,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.916015625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01737392246723175,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012654472608119248,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012654472608119248,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002495748782530427,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.775390625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036962830927222967,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.119685970887076e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.119685970887076e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2049511432647705,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.265233251452446,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2049511432647705,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.265233251452446,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2049511432647705,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.265233251452446,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2049511432647705,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.265233251452446,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2049511432647705,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.265233251452446,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2049511432647705,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.265233251452446,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2049511432647705,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.265233251452446,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00256188940256834,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04639850929379463,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.655859375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06019414514303208,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004639850929379463,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004639850929379463,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18500931262969972,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.766015625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2503536373376846,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018500932306051255,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018500932306051255,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30831422750676263,
|
|
"calibration/batch_distribution_entropy": 0.9839200839606193,
|
|
"calibration/batch_entropy_100bins": 0.9640602725866346,
|
|
"calibration/batch_entropy_10bins": 0.9839200839606193,
|
|
"calibration/batch_entropy_50bins": 0.9787751606670845,
|
|
"calibration/batch_uniqueness": 0.957394812760023,
|
|
"calibration/buffer_distribution_entropy": 0.9838445722781197,
|
|
"calibration/buffer_entropy_100bins": 0.9547516645514659,
|
|
"calibration/buffer_entropy_10bins": 0.9838445722781197,
|
|
"calibration/buffer_entropy_50bins": 0.9715224456315298,
|
|
"calibration/confidence_entropy": 0.4862570490637088,
|
|
"calibration/coverage@0%": 0.016037793542074363,
|
|
"calibration/coverage@1%": 0.016037793542074363,
|
|
"calibration/coverage@10%": 0.06649798189823874,
|
|
"calibration/coverage@15%": 0.14706152152641877,
|
|
"calibration/coverage@20%": 0.27959806139921717,
|
|
"calibration/coverage@25%": 0.36794046599804303,
|
|
"calibration/coverage@30%": 0.46173709637964777,
|
|
"calibration/coverage@5%": 0.025431139921722114,
|
|
"calibration/ece": 0.12583506352110047,
|
|
"calibration/mean_confidence": 0.5307190906026357,
|
|
"calibration/prompt_uniqueness": 0.8810963514568158,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1214.6,
|
|
"completions/max_terminated_length": 601.2,
|
|
"completions/mean_length": 192.29521484375,
|
|
"completions/mean_terminated_length": 191.76925048828124,
|
|
"completions/min_length": 85.2,
|
|
"completions/min_terminated_length": 85.2,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0009384758886881173,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 317934313.0,
|
|
"reward": 0.9246511697769165,
|
|
"reward_std": 0.10153087228536606,
|
|
"rewards/accuracy_reward": 0.50927734375,
|
|
"rewards/brier_reward": 0.7537349700927735,
|
|
"rewards/confidence_uniqueness_reward": 0.9565217852592468,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.002995410794392228,
|
|
"rewards/frontier_coverage_0": 0.09300088435411454,
|
|
"rewards/frontier_coverage_1": 0.09300088435411454,
|
|
"rewards/frontier_coverage_10": 0.09300088435411454,
|
|
"rewards/frontier_coverage_15": 0.09300088435411454,
|
|
"rewards/frontier_coverage_20": 0.09300088435411454,
|
|
"rewards/frontier_coverage_25": 0.09300088435411454,
|
|
"rewards/frontier_coverage_5": 0.09300088435411454,
|
|
"rewards/frontier_ece_reward": 0.0181322168558836,
|
|
"rewards/frontier_entropy_batch_reward": -0.10633568167686462,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.124017333984375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.186328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1662678450345993,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.509375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0620086669921875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0620086669921875,
|
|
"signal/advantage_abs_mean": 0.07806494235992431,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07806494235992431,
|
|
"signal/advantage_pre_scale_std": 0.12007036358118058,
|
|
"signal/advantage_std": 0.12007036358118058,
|
|
"signal/brier_reward/centered_abs_mean": 0.17296849191188812,
|
|
"signal/brier_reward/group_bin_occupancy": 0.864453125,
|
|
"signal/brier_reward/group_std_mean": 0.21920109391212464,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017296848818659782,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017296848818659782,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012902907282114028,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.941015625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01767069585621357,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00129029075615108,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00129029075615108,
|
|
"signal/format_reward/centered_abs_mean": 0.001123046875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.0029782544821500777,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005615234375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022347769234329464,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.77109375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003333268640562892,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7934712124988437e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7934712124988437e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22140364944934846,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2837002158164978,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22140364944934846,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2837002158164978,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22140364944934846,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2837002158164978,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22140364944934846,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2837002158164978,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22140364944934846,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2837002158164978,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22140364944934846,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2837002158164978,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22140364944934846,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2837002158164978,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002767545636743307,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.041382260620594025,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.633984375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05412525683641434,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004138226062059403,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004138226062059403,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1742929309606552,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.769921875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23640851378440858,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017429293505847453,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017429293505847453,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2659389940843186,
|
|
"calibration/batch_distribution_entropy": 0.9876112772320713,
|
|
"calibration/batch_entropy_100bins": 0.9641565709786863,
|
|
"calibration/batch_entropy_10bins": 0.9876112772320713,
|
|
"calibration/batch_entropy_50bins": 0.9764263295564872,
|
|
"calibration/batch_uniqueness": 0.9552329008986323,
|
|
"calibration/buffer_distribution_entropy": 0.9848758337567493,
|
|
"calibration/buffer_entropy_100bins": 0.9586118774229,
|
|
"calibration/buffer_entropy_10bins": 0.9848758337567493,
|
|
"calibration/buffer_entropy_50bins": 0.9739535676958562,
|
|
"calibration/confidence_entropy": 0.48793486664465907,
|
|
"calibration/coverage@0%": 0.015629586594911937,
|
|
"calibration/coverage@1%": 0.015629586594911937,
|
|
"calibration/coverage@10%": 0.1556193431996086,
|
|
"calibration/coverage@15%": 0.35648009417808224,
|
|
"calibration/coverage@20%": 0.4373753975048924,
|
|
"calibration/coverage@25%": 0.5413389799412915,
|
|
"calibration/coverage@30%": 0.646870413405088,
|
|
"calibration/coverage@5%": 0.052348336594911934,
|
|
"calibration/ece": 0.14271041517983887,
|
|
"calibration/mean_confidence": 0.5320063723249747,
|
|
"calibration/prompt_uniqueness": 0.8741963937304892,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 931.0,
|
|
"completions/max_terminated_length": 538.0,
|
|
"completions/mean_length": 195.34619140625,
|
|
"completions/mean_terminated_length": 195.08486938476562,
|
|
"completions/min_length": 81.6,
|
|
"completions/min_terminated_length": 81.6,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0009181920322589576,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 335023362.0,
|
|
"reward": 0.9346740245819092,
|
|
"reward_std": 0.08925552219152451,
|
|
"rewards/accuracy_reward": 0.52470703125,
|
|
"rewards/brier_reward": 0.7718238949775695,
|
|
"rewards/confidence_uniqueness_reward": 0.9552286267280579,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0027100421022623776,
|
|
"rewards/frontier_coverage_0": 0.09961767829954624,
|
|
"rewards/frontier_coverage_1": 0.09961767829954624,
|
|
"rewards/frontier_coverage_10": 0.09961767829954624,
|
|
"rewards/frontier_coverage_15": 0.09961767829954624,
|
|
"rewards/frontier_coverage_20": 0.09961767829954624,
|
|
"rewards/frontier_coverage_25": 0.09961767829954624,
|
|
"rewards/frontier_coverage_5": 0.09961767829954624,
|
|
"rewards/frontier_ece_reward": 0.022007835283875465,
|
|
"rewards/frontier_entropy_batch_reward": -0.11024036258459091,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.096905517578125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1309303015470505,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0484527587890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0484527587890625,
|
|
"signal/advantage_abs_mean": 0.06786017194390297,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06786017194390297,
|
|
"signal/advantage_pre_scale_std": 0.10885472595691681,
|
|
"signal/advantage_std": 0.10885472595691681,
|
|
"signal/brier_reward/centered_abs_mean": 0.15993238091468812,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8578125,
|
|
"signal/brier_reward/group_std_mean": 0.20314022600650788,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015993238613009452,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015993238613009452,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013150414079427719,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018019300326704978,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001315041445195675,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001315041445195675,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021082177059724926,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.777734375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003096911637112498,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6352722488809378e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6352722488809378e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1996733397245407,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.25663221478462217,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1996733397245407,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.25663221478462217,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1996733397245407,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.25663221478462217,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1996733397245407,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.25663221478462217,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1996733397245407,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.25663221478462217,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1996733397245407,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.25663221478462217,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1996733397245407,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.873046875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.25663221478462217,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00249591669999063,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.039423568546772,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.628515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05080499574542045,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003942356910556555,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003942356910556555,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18164745569229127,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.77109375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24795118868350982,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018164745718240737,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018164745718240737,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.5011022568208787,
|
|
"eval_calibration/batch_distribution_entropy": 0.9195527723724205,
|
|
"eval_calibration/batch_entropy_100bins": 0.7037637102944632,
|
|
"eval_calibration/batch_entropy_10bins": 0.9195527723724205,
|
|
"eval_calibration/batch_entropy_50bins": 0.779983490066886,
|
|
"eval_calibration/batch_uniqueness": 0.8955078125,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9854447883057991,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9607979279881629,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9854447883057991,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9752806536193837,
|
|
"eval_calibration/confidence_entropy": 0.46313100587975253,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.046875,
|
|
"eval_calibration/coverage@25%": 0.1015625,
|
|
"eval_calibration/coverage@30%": 0.1015625,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.21962422290224884,
|
|
"eval_calibration/mean_confidence": 0.4459440781139095,
|
|
"eval_calibration/prompt_uniqueness": 0.8955078125,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 598.0,
|
|
"eval_completions/max_terminated_length": 598.0,
|
|
"eval_completions/mean_length": 205.20069885253906,
|
|
"eval_completions/mean_terminated_length": 205.20069885253906,
|
|
"eval_completions/min_length": 97.75,
|
|
"eval_completions/min_terminated_length": 97.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 335023362.0,
|
|
"eval_reward": 0.823313757777214,
|
|
"eval_reward_std": 0.230881467461586,
|
|
"eval_rewards/accuracy_reward": 0.404296875,
|
|
"eval_rewards/brier_reward": 0.7818417847156525,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.90087890625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0032422358635812998,
|
|
"eval_rewards/frontier_coverage_0": 0.19046474620699883,
|
|
"eval_rewards/frontier_coverage_1": 0.19046474620699883,
|
|
"eval_rewards/frontier_coverage_10": 0.19046474620699883,
|
|
"eval_rewards/frontier_coverage_15": 0.19046474620699883,
|
|
"eval_rewards/frontier_coverage_20": 0.19046474620699883,
|
|
"eval_rewards/frontier_coverage_25": 0.19046474620699883,
|
|
"eval_rewards/frontier_coverage_5": 0.19046474620699883,
|
|
"eval_rewards/frontier_ece_reward": 0.015543812420219183,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.652862548828125,
|
|
"eval_runtime": 27.5363,
|
|
"eval_samples_per_second": 18.158,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4705810546875,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4924849271774292,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23529052734375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23529052734375,
|
|
"eval_signal/advantage_abs_mean": 0.21217802911996841,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21217802911996841,
|
|
"eval_signal/advantage_pre_scale_std": 0.2284625768661499,
|
|
"eval_signal/advantage_std": 0.2284625768661499,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19572831690311432,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2521805912256241,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019572831690311432,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019572831690311432,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037384033203125,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3828125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.043207885697484016,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003738403378520161,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003738403378520161,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003047365229576826,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.765625,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004978827317245305,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8092065551609267e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8092065551609267e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3660961836576462,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4516659453511238,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3660961836576462,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4516659453511238,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3660961836576462,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4516659453511238,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3660961836576462,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4516659453511238,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3660961836576462,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4516659453511238,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3660961836576462,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4516659453511238,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3660961836576462,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4516659453511238,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004576202598400414,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.037898930720984936,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.7265625,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.06024767179042101,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003789893235079944,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003789893235079944,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3284454345703125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.34034357219934464,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03284454345703125,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03284454345703125,
|
|
"eval_steps_per_second": 0.145,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3048095368177367,
|
|
"calibration/batch_distribution_entropy": 0.9827100885636348,
|
|
"calibration/batch_entropy_100bins": 0.9700066493969526,
|
|
"calibration/batch_entropy_10bins": 0.9827100885636348,
|
|
"calibration/batch_entropy_50bins": 0.9797538783318143,
|
|
"calibration/batch_uniqueness": 0.9539394591967032,
|
|
"calibration/buffer_distribution_entropy": 0.9874702323709037,
|
|
"calibration/buffer_entropy_100bins": 0.9646559344286517,
|
|
"calibration/buffer_entropy_10bins": 0.9874702323709037,
|
|
"calibration/buffer_entropy_50bins": 0.9780032413599857,
|
|
"calibration/confidence_entropy": 0.49428211357645263,
|
|
"calibration/coverage@0%": 0.003515625,
|
|
"calibration/coverage@1%": 0.003515625,
|
|
"calibration/coverage@10%": 0.010945144324853228,
|
|
"calibration/coverage@15%": 0.06413817881604697,
|
|
"calibration/coverage@20%": 0.13452941536203522,
|
|
"calibration/coverage@25%": 0.37636833414872795,
|
|
"calibration/coverage@30%": 0.5475178877201565,
|
|
"calibration/coverage@5%": 0.003515625,
|
|
"calibration/ece": 0.13704558129813732,
|
|
"calibration/mean_confidence": 0.4775055551271814,
|
|
"calibration/prompt_uniqueness": 0.8719490805476067,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1152.0,
|
|
"completions/max_terminated_length": 562.4,
|
|
"completions/mean_length": 200.309765625,
|
|
"completions/mean_terminated_length": 199.526708984375,
|
|
"completions/min_length": 87.0,
|
|
"completions/min_terminated_length": 87.0,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0009267533314414322,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0026,
|
|
"num_tokens": 351796966.0,
|
|
"reward": 0.9373140454292297,
|
|
"reward_std": 0.09476064741611481,
|
|
"rewards/accuracy_reward": 0.53466796875,
|
|
"rewards/brier_reward": 0.7662975668907166,
|
|
"rewards/confidence_uniqueness_reward": 0.9530789375305175,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.002560078864917159,
|
|
"rewards/frontier_coverage_0": 0.08570035807788372,
|
|
"rewards/frontier_coverage_1": 0.08570035807788372,
|
|
"rewards/frontier_coverage_10": 0.08570035807788372,
|
|
"rewards/frontier_coverage_15": 0.08570035807788372,
|
|
"rewards/frontier_coverage_20": 0.08570035807788372,
|
|
"rewards/frontier_coverage_25": 0.08570035807788372,
|
|
"rewards/frontier_coverage_5": 0.08570035807788372,
|
|
"rewards/frontier_ece_reward": 0.019346101209521294,
|
|
"rewards/frontier_entropy_batch_reward": -0.1096835508942604,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.114910888671875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.180078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.15273061096668245,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574554443359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0574554443359375,
|
|
"signal/advantage_abs_mean": 0.07300383895635605,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07300383895635605,
|
|
"signal/advantage_pre_scale_std": 0.11499524712562562,
|
|
"signal/advantage_std": 0.11499524712562562,
|
|
"signal/brier_reward/centered_abs_mean": 0.16318108439445494,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8609375,
|
|
"signal/brier_reward/group_std_mean": 0.20483859181404113,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01631810814142227,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01631810814142227,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013554375991225243,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.929296875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01912703476846218,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013554376550018788,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013554376550018788,
|
|
"signal/format_reward/centered_abs_mean": 0.00150146484375,
|
|
"signal/format_reward/group_bin_occupancy": 0.127734375,
|
|
"signal/format_reward/group_std_mean": 0.004083108808845282,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020001448690891268,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.75625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003017709869891405,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5001811809488573e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5001811809488573e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21160376965999603,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.26891090869903567,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21160376965999603,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26891090869903567,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21160376965999603,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26891090869903567,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21160376965999603,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26891090869903567,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21160376965999603,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26891090869903567,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21160376965999603,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26891090869903567,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21160376965999603,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26891090869903567,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00264504705555737,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.034326578676700595,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.629296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.045123565942049026,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003432658081874251,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003432658081874251,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1763071745634079,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.760546875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23879291415214537,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01763071771711111,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01763071771711111,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3209504238614419,
|
|
"calibration/batch_distribution_entropy": 0.9667923872892714,
|
|
"calibration/batch_entropy_100bins": 0.9595862106455335,
|
|
"calibration/batch_entropy_10bins": 0.9667923872892714,
|
|
"calibration/batch_entropy_50bins": 0.9686096089602719,
|
|
"calibration/batch_uniqueness": 0.9500500726518638,
|
|
"calibration/buffer_distribution_entropy": 0.9931473031803522,
|
|
"calibration/buffer_entropy_100bins": 0.9763724503789746,
|
|
"calibration/buffer_entropy_10bins": 0.9931473031803522,
|
|
"calibration/buffer_entropy_50bins": 0.9862473564066343,
|
|
"calibration/confidence_entropy": 0.46706245514587097,
|
|
"calibration/coverage@0%": 0.025416648680019954,
|
|
"calibration/coverage@1%": 0.025416648680019954,
|
|
"calibration/coverage@10%": 0.13932512302866354,
|
|
"calibration/coverage@15%": 0.24454261456294848,
|
|
"calibration/coverage@20%": 0.33253733428302834,
|
|
"calibration/coverage@25%": 0.43029770598307815,
|
|
"calibration/coverage@30%": 0.5132089376846629,
|
|
"calibration/coverage@5%": 0.03520984720943172,
|
|
"calibration/ece": 0.12471999058892232,
|
|
"calibration/mean_confidence": 0.44264518386490936,
|
|
"calibration/prompt_uniqueness": 0.8607651026762486,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 977.8,
|
|
"completions/max_terminated_length": 576.2,
|
|
"completions/mean_length": 201.4564453125,
|
|
"completions/mean_terminated_length": 200.93560180664062,
|
|
"completions/min_length": 84.2,
|
|
"completions/min_terminated_length": 84.2,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0009329073945991695,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 369120296.0,
|
|
"reward": 0.908801531791687,
|
|
"reward_std": 0.09598542004823685,
|
|
"rewards/accuracy_reward": 0.47099609375,
|
|
"rewards/brier_reward": 0.7735196352005005,
|
|
"rewards/confidence_uniqueness_reward": 0.9524645686149598,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.003027368104085326,
|
|
"rewards/frontier_coverage_0": 0.13684964329004287,
|
|
"rewards/frontier_coverage_1": 0.13684964329004287,
|
|
"rewards/frontier_coverage_10": 0.13684964329004287,
|
|
"rewards/frontier_coverage_15": 0.13684964329004287,
|
|
"rewards/frontier_coverage_20": 0.13684964329004287,
|
|
"rewards/frontier_coverage_25": 0.13684964329004287,
|
|
"rewards/frontier_coverage_5": 0.13684964329004287,
|
|
"rewards/frontier_ece_reward": 0.015464337170124054,
|
|
"rewards/frontier_entropy_batch_reward": -0.12436114549636841,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.117779541015625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.15502216517925263,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0588897705078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0588897705078125,
|
|
"signal/advantage_abs_mean": 0.0737321838736534,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0737321838736534,
|
|
"signal/advantage_pre_scale_std": 0.11658578664064408,
|
|
"signal/advantage_std": 0.11658578664064408,
|
|
"signal/brier_reward/centered_abs_mean": 0.158657768368721,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85703125,
|
|
"signal/brier_reward/group_std_mean": 0.20199429094791413,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01586577631533146,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01586577631533146,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013269886001944542,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.933984375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018744326569139956,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001326988684013486,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001326988684013486,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.127734375,
|
|
"signal/format_reward/group_std_mean": 0.003866990143433213,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023643441498279573,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.752734375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036352206021547317,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9554302818723956e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9554302818723956e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21218776404857637,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.883984375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.268929660320282,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21218776404857637,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.883984375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.268929660320282,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21218776404857637,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.883984375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.268929660320282,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21218776404857637,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.883984375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.268929660320282,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21218776404857637,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.883984375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.268929660320282,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21218776404857637,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.883984375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.268929660320282,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21218776404857637,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.883984375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.268929660320282,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002652347134426236,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02997433766722679,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.63515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03954430893063545,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0029974338132888077,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0029974338132888077,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1805424988269806,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.762109375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24203293323516845,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01805424988269806,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01805424988269806,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3790243716711346,
|
|
"calibration/batch_distribution_entropy": 0.982709811496667,
|
|
"calibration/batch_entropy_100bins": 0.9717901920414483,
|
|
"calibration/batch_entropy_10bins": 0.982709811496667,
|
|
"calibration/batch_entropy_50bins": 0.980121350130229,
|
|
"calibration/batch_uniqueness": 0.954217529296875,
|
|
"calibration/buffer_distribution_entropy": 0.9971913463073884,
|
|
"calibration/buffer_entropy_100bins": 0.9857669408440536,
|
|
"calibration/buffer_entropy_10bins": 0.9971913463073884,
|
|
"calibration/buffer_entropy_50bins": 0.9926144523426681,
|
|
"calibration/confidence_entropy": 0.4853503677467438,
|
|
"calibration/coverage@0%": 0.003125,
|
|
"calibration/coverage@1%": 0.003125,
|
|
"calibration/coverage@10%": 0.024609375,
|
|
"calibration/coverage@15%": 0.046875,
|
|
"calibration/coverage@20%": 0.2328125,
|
|
"calibration/coverage@25%": 0.32734375,
|
|
"calibration/coverage@30%": 0.419921875,
|
|
"calibration/coverage@5%": 0.003125,
|
|
"calibration/ece": 0.1385865561118635,
|
|
"calibration/mean_confidence": 0.5192355450283547,
|
|
"calibration/prompt_uniqueness": 0.870849609375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1157.8,
|
|
"completions/max_terminated_length": 715.4,
|
|
"completions/mean_length": 198.96796875,
|
|
"completions/mean_terminated_length": 198.1828582763672,
|
|
"completions/min_length": 91.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0008175044204108417,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 386223200.0,
|
|
"reward": 0.9230035901069641,
|
|
"reward_std": 0.08821378499269486,
|
|
"rewards/accuracy_reward": 0.49833984375,
|
|
"rewards/brier_reward": 0.7744688391685486,
|
|
"rewards/confidence_uniqueness_reward": 0.9531052112579346,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0032609821762889625,
|
|
"rewards/frontier_coverage_0": 0.11651851236820221,
|
|
"rewards/frontier_coverage_1": 0.11651851236820221,
|
|
"rewards/frontier_coverage_10": 0.11651851236820221,
|
|
"rewards/frontier_coverage_15": 0.11651851236820221,
|
|
"rewards/frontier_coverage_20": 0.11651851236820221,
|
|
"rewards/frontier_coverage_25": 0.11651851236820221,
|
|
"rewards/frontier_coverage_5": 0.11651851236820221,
|
|
"rewards/frontier_ece_reward": 0.014127342030405998,
|
|
"rewards/frontier_entropy_batch_reward": -0.10198113471269607,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.100238037109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.175390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13502895534038545,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0501190185546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0501190185546875,
|
|
"signal/advantage_abs_mean": 0.06722283586859704,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06722283586859704,
|
|
"signal/advantage_pre_scale_std": 0.11006049364805222,
|
|
"signal/advantage_std": 0.11006049364805222,
|
|
"signal/brier_reward/centered_abs_mean": 0.1503296136856079,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85,
|
|
"signal/brier_reward/group_std_mean": 0.19333814978599548,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015032961405813694,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015032961405813694,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012458873353898526,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.953125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016741343960165977,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001245887391269207,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001245887391269207,
|
|
"signal/format_reward/centered_abs_mean": 0.0010986328125,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0025827332865446806,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00054931640625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00054931640625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028565511573106052,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004412284214049577,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.570688932086341e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.570688932086341e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1842558950185776,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23739778399467468,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1842558950185776,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23739778399467468,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1842558950185776,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23739778399467468,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1842558950185776,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23739778399467468,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1842558950185776,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23739778399467468,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1842558950185776,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23739778399467468,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1842558950185776,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23739778399467468,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023031987249851225,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02585429698228836,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.67109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03366940915584564,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0025854297447949646,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0025854297447949646,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1619349092245102,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.804296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.21937708854675292,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016193491220474244,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016193491220474244,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3183561455024736,
|
|
"calibration/batch_distribution_entropy": 0.9761403104003881,
|
|
"calibration/batch_entropy_100bins": 0.9665729483264448,
|
|
"calibration/batch_entropy_10bins": 0.9761403104003881,
|
|
"calibration/batch_entropy_50bins": 0.9747901306945629,
|
|
"calibration/batch_uniqueness": 0.9519656303020192,
|
|
"calibration/buffer_distribution_entropy": 0.9990661585840115,
|
|
"calibration/buffer_entropy_100bins": 0.9913789451496673,
|
|
"calibration/buffer_entropy_10bins": 0.9990661585840115,
|
|
"calibration/buffer_entropy_50bins": 0.9961616782652717,
|
|
"calibration/confidence_entropy": 0.4738653115445815,
|
|
"calibration/coverage@0%": 0.01641236545988258,
|
|
"calibration/coverage@1%": 0.01641236545988258,
|
|
"calibration/coverage@10%": 0.22210815190802347,
|
|
"calibration/coverage@15%": 0.278392551369863,
|
|
"calibration/coverage@20%": 0.31787396037182,
|
|
"calibration/coverage@25%": 0.3725928021037182,
|
|
"calibration/coverage@30%": 0.43044046599804303,
|
|
"calibration/coverage@5%": 0.09390594422700586,
|
|
"calibration/ece": 0.14317070799732728,
|
|
"calibration/mean_confidence": 0.4663198053679688,
|
|
"calibration/prompt_uniqueness": 0.8607313074488381,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00068359375,
|
|
"completions/max_length": 1210.2,
|
|
"completions/max_terminated_length": 720.4,
|
|
"completions/mean_length": 199.19677734375,
|
|
"completions/mean_terminated_length": 198.28481750488282,
|
|
"completions/min_length": 85.0,
|
|
"completions/min_terminated_length": 85.0,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0007297234842553735,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0017,
|
|
"num_tokens": 403119487.0,
|
|
"reward": 0.9370161175727845,
|
|
"reward_std": 0.09072954654693603,
|
|
"rewards/accuracy_reward": 0.52900390625,
|
|
"rewards/brier_reward": 0.7911636233329773,
|
|
"rewards/confidence_uniqueness_reward": 0.9525709033012391,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.002813473041169345,
|
|
"rewards/frontier_coverage_0": 0.11049409657716751,
|
|
"rewards/frontier_coverage_1": 0.11049409657716751,
|
|
"rewards/frontier_coverage_10": 0.11049409657716751,
|
|
"rewards/frontier_coverage_15": 0.11049409657716751,
|
|
"rewards/frontier_coverage_20": 0.11049409657716751,
|
|
"rewards/frontier_coverage_25": 0.11049409657716751,
|
|
"rewards/frontier_coverage_5": 0.11049409657716751,
|
|
"rewards/frontier_ece_reward": 0.013869478553533553,
|
|
"rewards/frontier_entropy_batch_reward": -0.12488683462142944,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.104974365234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14191071838140487,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0524871826171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0524871826171875,
|
|
"signal/advantage_abs_mean": 0.06853913813829422,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06853913813829422,
|
|
"signal/advantage_pre_scale_std": 0.11065925359725952,
|
|
"signal/advantage_std": 0.11065925359725952,
|
|
"signal/brier_reward/centered_abs_mean": 0.14036066234111785,
|
|
"signal/brier_reward/group_bin_occupancy": 0.849609375,
|
|
"signal/brier_reward/group_std_mean": 0.18114359974861144,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014036066457629203,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014036066457629203,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013375256024301053,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.95078125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018456452712416648,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013375255977734924,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013375255977734924,
|
|
"signal/format_reward/centered_abs_mean": 0.0014892578125,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0037468004506081344,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00074462890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00074462890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028811234049499037,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004553025308996439,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6014043507748285e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6014043507748285e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18208499848842621,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2352500468492508,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18208499848842621,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2352500468492508,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18208499848842621,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2352500468492508,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18208499848842621,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2352500468492508,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18208499848842621,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2352500468492508,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18208499848842621,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2352500468492508,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18208499848842621,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2352500468492508,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022760625462979077,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02035584971308708,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.685546875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.026301588490605356,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020355849992483854,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020355849992483854,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1847107857465744,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.79296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24139588475227355,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0184710793197155,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0184710793197155,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4323062871824819,
|
|
"calibration/batch_distribution_entropy": 0.9845003654379166,
|
|
"calibration/batch_entropy_100bins": 0.9709600544841221,
|
|
"calibration/batch_entropy_10bins": 0.9845003654379166,
|
|
"calibration/batch_entropy_50bins": 0.9813723090983913,
|
|
"calibration/batch_uniqueness": 0.9541264325075541,
|
|
"calibration/buffer_distribution_entropy": 0.9991648742985866,
|
|
"calibration/buffer_entropy_100bins": 0.9930824204994557,
|
|
"calibration/buffer_entropy_10bins": 0.9991648742985866,
|
|
"calibration/buffer_entropy_50bins": 0.9971470754608687,
|
|
"calibration/confidence_entropy": 0.49521664288064526,
|
|
"calibration/coverage@0%": 0.001171875,
|
|
"calibration/coverage@1%": 0.001171875,
|
|
"calibration/coverage@10%": 0.001171875,
|
|
"calibration/coverage@15%": 0.001171875,
|
|
"calibration/coverage@20%": 0.013671875,
|
|
"calibration/coverage@25%": 0.058984375,
|
|
"calibration/coverage@30%": 0.15415134803921568,
|
|
"calibration/coverage@5%": 0.001171875,
|
|
"calibration/ece": 0.15455243215267175,
|
|
"calibration/mean_confidence": 0.5068435462847125,
|
|
"calibration/prompt_uniqueness": 0.8713734809027777,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1140.8,
|
|
"completions/max_terminated_length": 579.8,
|
|
"completions/mean_length": 196.72412109375,
|
|
"completions/mean_terminated_length": 196.0713348388672,
|
|
"completions/min_length": 91.8,
|
|
"completions/min_terminated_length": 91.8,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.0010661915875971317,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 420170390.0,
|
|
"reward": 0.9237029790878296,
|
|
"reward_std": 0.10037501901388168,
|
|
"rewards/accuracy_reward": 0.51015625,
|
|
"rewards/brier_reward": 0.7678788423538208,
|
|
"rewards/confidence_uniqueness_reward": 0.9519475817680358,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0038750086445361377,
|
|
"rewards/frontier_coverage_0": 0.10178494267165661,
|
|
"rewards/frontier_coverage_1": 0.10178494267165661,
|
|
"rewards/frontier_coverage_10": 0.10178494267165661,
|
|
"rewards/frontier_coverage_15": 0.10178494267165661,
|
|
"rewards/frontier_coverage_20": 0.10178494267165661,
|
|
"rewards/frontier_coverage_25": 0.10178494267165661,
|
|
"rewards/frontier_coverage_5": 0.10178494267165661,
|
|
"rewards/frontier_ece_reward": 0.009860872942954301,
|
|
"rewards/frontier_entropy_batch_reward": -0.12810986787080764,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12548828125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.16264511346817018,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.54375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062744140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062744140625,
|
|
"signal/advantage_abs_mean": 0.07850312739610672,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07850312739610672,
|
|
"signal/advantage_pre_scale_std": 0.12315381318330765,
|
|
"signal/advantage_std": 0.12315381318330765,
|
|
"signal/brier_reward/centered_abs_mean": 0.1579117149114609,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8765625,
|
|
"signal/brier_reward/group_std_mean": 0.19992058277130126,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015791171602904795,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015791171602904795,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013566328212618828,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.938671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019081654772162438,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001356632891111076,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001356632891111076,
|
|
"signal/format_reward/centered_abs_mean": 0.00150146484375,
|
|
"signal/format_reward/group_bin_occupancy": 0.127734375,
|
|
"signal/format_reward/group_std_mean": 0.0040831089485436674,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000750732421875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.004184631397947669,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006817103549838066,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.230789611232467e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.230789611232467e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1897767126560211,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24595032632350922,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1897767126560211,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24595032632350922,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1897767126560211,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24595032632350922,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1897767126560211,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24595032632350922,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1897767126560211,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24595032632350922,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1897767126560211,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24595032632350922,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1897767126560211,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24595032632350922,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00237220898270607,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.018028103187680243,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.69921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.023144710436463357,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0018028103280812503,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0018028103280812503,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18568139374256135,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.759375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24778930544853212,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018568138778209686,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018568138778209686,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32245475702575754,
|
|
"calibration/batch_distribution_entropy": 0.978982327771489,
|
|
"calibration/batch_entropy_100bins": 0.9670318034930006,
|
|
"calibration/batch_entropy_10bins": 0.978982327771489,
|
|
"calibration/batch_entropy_50bins": 0.9769518685366212,
|
|
"calibration/batch_uniqueness": 0.9534027099609375,
|
|
"calibration/buffer_distribution_entropy": 0.999017226336244,
|
|
"calibration/buffer_entropy_100bins": 0.9933569492297052,
|
|
"calibration/buffer_entropy_10bins": 0.999017226336244,
|
|
"calibration/buffer_entropy_50bins": 0.997349282060987,
|
|
"calibration/confidence_entropy": 0.5102350146041745,
|
|
"calibration/coverage@0%": 0.006640625,
|
|
"calibration/coverage@1%": 0.006640625,
|
|
"calibration/coverage@10%": 0.022265625,
|
|
"calibration/coverage@15%": 0.065625,
|
|
"calibration/coverage@20%": 0.127734375,
|
|
"calibration/coverage@25%": 0.27734375,
|
|
"calibration/coverage@30%": 0.471484375,
|
|
"calibration/coverage@5%": 0.014453125,
|
|
"calibration/ece": 0.10677393080062363,
|
|
"calibration/mean_confidence": 0.51815231260255,
|
|
"calibration/prompt_uniqueness": 0.870751953125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 960.8,
|
|
"completions/max_terminated_length": 752.4,
|
|
"completions/mean_length": 198.9875,
|
|
"completions/mean_terminated_length": 198.72580261230468,
|
|
"completions/min_length": 92.6,
|
|
"completions/min_terminated_length": 92.6,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0009462664602324367,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 437089206.0,
|
|
"reward": 0.9285161018371582,
|
|
"reward_std": 0.09302979856729507,
|
|
"rewards/accuracy_reward": 0.51337890625,
|
|
"rewards/brier_reward": 0.7765425562858581,
|
|
"rewards/confidence_uniqueness_reward": 0.953677773475647,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0032080319710075856,
|
|
"rewards/frontier_coverage_0": 0.10404052138328553,
|
|
"rewards/frontier_coverage_1": 0.10404052138328553,
|
|
"rewards/frontier_coverage_10": 0.10404052138328553,
|
|
"rewards/frontier_coverage_15": 0.10404052138328553,
|
|
"rewards/frontier_coverage_20": 0.10404052138328553,
|
|
"rewards/frontier_coverage_25": 0.10326478481292725,
|
|
"rewards/frontier_coverage_5": 0.10404052138328553,
|
|
"rewards/frontier_ece_reward": 0.009166896902024746,
|
|
"rewards/frontier_entropy_batch_reward": -0.11019333750009537,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.115802001953125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.151393261551857,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.56875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0579010009765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0579010009765625,
|
|
"signal/advantage_abs_mean": 0.07270590215921402,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07270590215921402,
|
|
"signal/advantage_pre_scale_std": 0.1149192675948143,
|
|
"signal/advantage_std": 0.1149192675948143,
|
|
"signal/brier_reward/centered_abs_mean": 0.1494935095310211,
|
|
"signal/brier_reward/group_bin_occupancy": 0.863671875,
|
|
"signal/brier_reward/group_std_mean": 0.18933481872081756,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01494935117661953,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01494935117661953,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012433665059506892,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.948828125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016522933915257453,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001243366557173431,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001243366557173431,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003389076329767704,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70703125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005695812962949276,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2363452666904775e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2363452666904775e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19482134580612182,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24719403684139252,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19482134580612182,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24719403684139252,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19482134580612182,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24719403684139252,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19482134580612182,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24719403684139252,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19482134580612182,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24719403684139252,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19220769107341767,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2439906269311905,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024025961756706238,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024025961756706238,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19482134580612182,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24719403684139252,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024352668784558772,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015177857503294945,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.019421032071113585,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015177857596427203,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015177857596427203,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17526901960372926,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.784765625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2358124166727066,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017526903189718723,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017526903189718723,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27758967566370596,
|
|
"calibration/batch_distribution_entropy": 0.9851777728674534,
|
|
"calibration/batch_entropy_100bins": 0.9700682358713884,
|
|
"calibration/batch_entropy_10bins": 0.9851777728674534,
|
|
"calibration/batch_entropy_50bins": 0.9804977790517506,
|
|
"calibration/batch_uniqueness": 0.9554472278853534,
|
|
"calibration/buffer_distribution_entropy": 0.9991109222945178,
|
|
"calibration/buffer_entropy_100bins": 0.9936594928536131,
|
|
"calibration/buffer_entropy_10bins": 0.9991109222945178,
|
|
"calibration/buffer_entropy_50bins": 0.9975580426679944,
|
|
"calibration/confidence_entropy": 0.4774470200653253,
|
|
"calibration/coverage@0%": 0.003912365459882583,
|
|
"calibration/coverage@1%": 0.003912365459882583,
|
|
"calibration/coverage@10%": 0.07587756849315068,
|
|
"calibration/coverage@15%": 0.16028620352250486,
|
|
"calibration/coverage@20%": 0.3328346685420744,
|
|
"calibration/coverage@25%": 0.4513515166340508,
|
|
"calibration/coverage@30%": 0.5909407106164384,
|
|
"calibration/coverage@5%": 0.003912365459882583,
|
|
"calibration/ece": 0.10711158925252093,
|
|
"calibration/mean_confidence": 0.5236740097988279,
|
|
"calibration/prompt_uniqueness": 0.8622971676638918,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 750.4,
|
|
"completions/max_terminated_length": 555.0,
|
|
"completions/mean_length": 195.87373046875,
|
|
"completions/mean_terminated_length": 195.74309997558595,
|
|
"completions/min_length": 94.8,
|
|
"completions/min_terminated_length": 94.8,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0009695117478258908,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 454109289.0,
|
|
"reward": 0.9475380301475524,
|
|
"reward_std": 0.08842353671789169,
|
|
"rewards/accuracy_reward": 0.54765625,
|
|
"rewards/brier_reward": 0.7932500720024109,
|
|
"rewards/confidence_uniqueness_reward": 0.9542429208755493,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0026097355876117945,
|
|
"rewards/frontier_coverage_0": 0.10053790956735612,
|
|
"rewards/frontier_coverage_1": 0.10053790956735612,
|
|
"rewards/frontier_coverage_10": 0.10053790956735612,
|
|
"rewards/frontier_coverage_15": 0.10053790956735612,
|
|
"rewards/frontier_coverage_20": 0.10053790956735612,
|
|
"rewards/frontier_coverage_25": 0.0982507586479187,
|
|
"rewards/frontier_coverage_5": 0.10053790956735612,
|
|
"rewards/frontier_ece_reward": 0.009860789589583873,
|
|
"rewards/frontier_entropy_batch_reward": -0.10614849850535393,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11329345703125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.176953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.14613474607467652,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056646728515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.056646728515625,
|
|
"signal/advantage_abs_mean": 0.06929384395480156,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06929384395480156,
|
|
"signal/advantage_pre_scale_std": 0.11137249916791916,
|
|
"signal/advantage_std": 0.11137249916791916,
|
|
"signal/brier_reward/centered_abs_mean": 0.1398220479488373,
|
|
"signal/brier_reward/group_bin_occupancy": 0.830078125,
|
|
"signal/brier_reward/group_std_mean": 0.17961786389350892,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013982205092906952,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013982205092906952,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011601312272250652,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.953515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015428530983626842,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001160131278447807,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001160131278447807,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003001504810526967,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.684375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005159206409007311,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.751881013158709e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.751881013158709e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18813469707965852,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23900977075099944,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18813469707965852,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23900977075099944,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18813469707965852,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23900977075099944,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18813469707965852,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23900977075099944,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18813469707965852,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23900977075099944,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18071556687355042,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22983711957931519,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022589446045458315,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022589446045458315,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18813469707965852,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23900977075099944,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023516837507486345,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013565455190837383,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.702734375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01733413077890873,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013565455097705125,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013565455097705125,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1644774168729782,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.791015625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.21864676475524902,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016447741910815238,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016447741910815238,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3032347508308273,
|
|
"calibration/batch_distribution_entropy": 0.9762598969677517,
|
|
"calibration/batch_entropy_100bins": 0.9644103028515179,
|
|
"calibration/batch_entropy_10bins": 0.9762598969677517,
|
|
"calibration/batch_entropy_50bins": 0.9749709313958501,
|
|
"calibration/batch_uniqueness": 0.953617455405724,
|
|
"calibration/buffer_distribution_entropy": 0.9986577848398988,
|
|
"calibration/buffer_entropy_100bins": 0.993577532222066,
|
|
"calibration/buffer_entropy_10bins": 0.9986577848398988,
|
|
"calibration/buffer_entropy_50bins": 0.9973486915751801,
|
|
"calibration/confidence_entropy": 0.49841096598646917,
|
|
"calibration/coverage@0%": 0.007428754892367906,
|
|
"calibration/coverage@1%": 0.007428754892367906,
|
|
"calibration/coverage@10%": 0.07159215998043053,
|
|
"calibration/coverage@15%": 0.145873593444227,
|
|
"calibration/coverage@20%": 0.18615230552837575,
|
|
"calibration/coverage@25%": 0.31355109466731895,
|
|
"calibration/coverage@30%": 0.4327329990215264,
|
|
"calibration/coverage@5%": 0.007428754892367906,
|
|
"calibration/ece": 0.12669799261164544,
|
|
"calibration/mean_confidence": 0.5609690276065933,
|
|
"calibration/prompt_uniqueness": 0.8721019669452394,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 706.4,
|
|
"completions/max_terminated_length": 494.0,
|
|
"completions/mean_length": 200.08984375,
|
|
"completions/mean_terminated_length": 199.95912170410156,
|
|
"completions/min_length": 90.4,
|
|
"completions/min_terminated_length": 90.4,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0010598563821986318,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 471111009.0,
|
|
"reward": 0.9310909390449524,
|
|
"reward_std": 0.08768343180418015,
|
|
"rewards/accuracy_reward": 0.51494140625,
|
|
"rewards/brier_reward": 0.7858774423599243,
|
|
"rewards/confidence_uniqueness_reward": 0.9541089057922363,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.003412813926115632,
|
|
"rewards/frontier_coverage_0": 0.11701681315898896,
|
|
"rewards/frontier_coverage_1": 0.11701681315898896,
|
|
"rewards/frontier_coverage_10": 0.11701681315898896,
|
|
"rewards/frontier_coverage_15": 0.11701681315898896,
|
|
"rewards/frontier_coverage_20": 0.11701681315898896,
|
|
"rewards/frontier_coverage_25": 0.11449546068906784,
|
|
"rewards/frontier_coverage_5": 0.11701681315898896,
|
|
"rewards/frontier_ece_reward": 0.007894697599112988,
|
|
"rewards/frontier_entropy_batch_reward": -0.11235017627477646,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.107989501953125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.13963166922330855,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0539947509765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0539947509765625,
|
|
"signal/advantage_abs_mean": 0.0684954434633255,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0684954434633255,
|
|
"signal/advantage_pre_scale_std": 0.11005422621965408,
|
|
"signal/advantage_std": 0.11005422621965408,
|
|
"signal/brier_reward/centered_abs_mean": 0.1446717381477356,
|
|
"signal/brier_reward/group_bin_occupancy": 0.853515625,
|
|
"signal/brier_reward/group_std_mean": 0.18473469018936156,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014467174373567104,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014467174373567104,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012451625987887382,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9578125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016036957129836084,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001245162612758577,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001245162612758577,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035400362219661476,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.688671875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006115310266613961,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.425045408424921e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.425045408424921e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18895358443260193,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2398217111825943,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18895358443260193,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2398217111825943,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18895358443260193,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2398217111825943,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18895358443260193,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2398217111825943,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18895358443260193,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2398217111825943,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1779042273759842,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2262539952993393,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022238029167056085,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022238029167056085,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18895358443260193,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2398217111825943,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023619199171662332,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012882906198501586,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.704296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.016543524339795113,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001288290647789836,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001288290647789836,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17680651247501372,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7890625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23934744596481322,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01768065169453621,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01768065169453621,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4091879941407811,
|
|
"calibration/batch_distribution_entropy": 0.9838473609296932,
|
|
"calibration/batch_entropy_100bins": 0.9688931964996506,
|
|
"calibration/batch_entropy_10bins": 0.9838473609296932,
|
|
"calibration/batch_entropy_50bins": 0.978368933561738,
|
|
"calibration/batch_uniqueness": 0.954656406723646,
|
|
"calibration/buffer_distribution_entropy": 0.9980712456007629,
|
|
"calibration/buffer_entropy_100bins": 0.9934814520723938,
|
|
"calibration/buffer_entropy_10bins": 0.9980712456007629,
|
|
"calibration/buffer_entropy_50bins": 0.9970431565305529,
|
|
"calibration/confidence_entropy": 0.5096224146267194,
|
|
"calibration/coverage@0%": 0.009782472492421626,
|
|
"calibration/coverage@1%": 0.009782472492421626,
|
|
"calibration/coverage@10%": 0.010955879355166724,
|
|
"calibration/coverage@15%": 0.010955879355166724,
|
|
"calibration/coverage@20%": 0.06653339161006869,
|
|
"calibration/coverage@25%": 0.12958179547503934,
|
|
"calibration/coverage@30%": 0.2461504229260197,
|
|
"calibration/coverage@5%": 0.009782472492421626,
|
|
"calibration/ece": 0.1322591503385764,
|
|
"calibration/mean_confidence": 0.4909400675946268,
|
|
"calibration/prompt_uniqueness": 0.8706819881633715,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1454.8,
|
|
"completions/max_terminated_length": 715.6,
|
|
"completions/mean_length": 202.88876953125,
|
|
"completions/mean_terminated_length": 202.2366149902344,
|
|
"completions/min_length": 90.8,
|
|
"completions/min_terminated_length": 90.8,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0008505330188199878,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 488359406.0,
|
|
"reward": 0.9026776432991028,
|
|
"reward_std": 0.0831810936331749,
|
|
"rewards/accuracy_reward": 0.46337890625,
|
|
"rewards/brier_reward": 0.7662590742111206,
|
|
"rewards/confidence_uniqueness_reward": 0.9531535387039185,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0037991167046129704,
|
|
"rewards/frontier_coverage_0": 0.13223221004009247,
|
|
"rewards/frontier_coverage_1": 0.13223221004009247,
|
|
"rewards/frontier_coverage_10": 0.13223221004009247,
|
|
"rewards/frontier_coverage_15": 0.13223221004009247,
|
|
"rewards/frontier_coverage_20": 0.13223221004009247,
|
|
"rewards/frontier_coverage_25": 0.12470296993851662,
|
|
"rewards/frontier_coverage_5": 0.13223221004009247,
|
|
"rewards/frontier_ece_reward": 0.0060929754748940464,
|
|
"rewards/frontier_entropy_batch_reward": -0.12698111385107042,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086773681640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11853125244379044,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0433868408203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0433868408203125,
|
|
"signal/advantage_abs_mean": 0.06301689743995667,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06301689743995667,
|
|
"signal/advantage_pre_scale_std": 0.10428053140640259,
|
|
"signal/advantage_std": 0.10428053140640259,
|
|
"signal/brier_reward/centered_abs_mean": 0.14725424647331237,
|
|
"signal/brier_reward/group_bin_occupancy": 0.86015625,
|
|
"signal/brier_reward/group_std_mean": 0.18804004192352294,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014725425094366074,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014725425094366074,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013746090233325958,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.936328125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01900733485817909,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013746090233325958,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013746090233325958,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0034574420657008885,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.702734375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005849315505474806,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3218026257818563e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3218026257818563e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18060636520385742,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2308868497610092,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18060636520385742,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2308868497610092,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18060636520385742,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2308868497610092,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18060636520385742,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2308868497610092,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18060636520385742,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2308868497610092,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16891390979290008,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21626182198524474,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021114239003509282,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021114239003509282,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18060636520385742,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2308868497610092,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022575796116143464,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011543591693043708,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.698828125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.015037482790648937,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011543591972440481,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011543591972440481,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18974616825580598,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75390625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25198211073875426,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01897461786866188,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01897461786866188,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29942965486469897,
|
|
"calibration/batch_distribution_entropy": 0.9821019550872274,
|
|
"calibration/batch_entropy_100bins": 0.9658469089767218,
|
|
"calibration/batch_entropy_10bins": 0.9821019550872274,
|
|
"calibration/batch_entropy_50bins": 0.9783809273107217,
|
|
"calibration/batch_uniqueness": 0.9548924237184917,
|
|
"calibration/buffer_distribution_entropy": 0.9978537146074318,
|
|
"calibration/buffer_entropy_100bins": 0.9935547771121023,
|
|
"calibration/buffer_entropy_10bins": 0.9978537146074318,
|
|
"calibration/buffer_entropy_50bins": 0.9969228501511747,
|
|
"calibration/confidence_entropy": 0.49102090478939714,
|
|
"calibration/coverage@0%": 0.018359375,
|
|
"calibration/coverage@1%": 0.018359375,
|
|
"calibration/coverage@10%": 0.045703125,
|
|
"calibration/coverage@15%": 0.078515625,
|
|
"calibration/coverage@20%": 0.3109375,
|
|
"calibration/coverage@25%": 0.383984375,
|
|
"calibration/coverage@30%": 0.506640625,
|
|
"calibration/coverage@5%": 0.019921875,
|
|
"calibration/ece": 0.13898620416440183,
|
|
"calibration/mean_confidence": 0.500086290529229,
|
|
"calibration/prompt_uniqueness": 0.8661629731399584,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 1326.0,
|
|
"completions/max_terminated_length": 534.4,
|
|
"completions/mean_length": 200.81513671875,
|
|
"completions/mean_terminated_length": 199.7722595214844,
|
|
"completions/min_length": 92.4,
|
|
"completions/min_terminated_length": 92.4,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.00103501055855304,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0021,
|
|
"num_tokens": 505463785.0,
|
|
"reward": 0.9328296899795532,
|
|
"reward_std": 0.08904698044061661,
|
|
"rewards/accuracy_reward": 0.5251953125,
|
|
"rewards/brier_reward": 0.7724361300468445,
|
|
"rewards/confidence_uniqueness_reward": 0.9536016345024109,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.002996008098125458,
|
|
"rewards/frontier_coverage_0": 0.10110445320606232,
|
|
"rewards/frontier_coverage_1": 0.10110445320606232,
|
|
"rewards/frontier_coverage_10": 0.10110445320606232,
|
|
"rewards/frontier_coverage_15": 0.10110445320606232,
|
|
"rewards/frontier_coverage_20": 0.10104168802499772,
|
|
"rewards/frontier_coverage_25": 0.09731045961380005,
|
|
"rewards/frontier_coverage_5": 0.10110445320606232,
|
|
"rewards/frontier_ece_reward": 0.006894841138273477,
|
|
"rewards/frontier_entropy_batch_reward": -0.11382773965597152,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1135009765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.176953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1488563358783722,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.584375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05675048828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05675048828125,
|
|
"signal/advantage_abs_mean": 0.06805618703365326,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06805618703365326,
|
|
"signal/advantage_pre_scale_std": 0.11172468066215516,
|
|
"signal/advantage_std": 0.11172468066215516,
|
|
"signal/brier_reward/centered_abs_mean": 0.14411163926124573,
|
|
"signal/brier_reward/group_bin_occupancy": 0.846484375,
|
|
"signal/brier_reward/group_std_mean": 0.1837744355201721,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0144111642614007,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0144111642614007,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01357480175793171,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.92734375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01972918212413788,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013574802316725254,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013574802316725254,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_bin_occupancy": 0.128515625,
|
|
"signal/format_reward/group_std_mean": 0.004971844470128417,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002930343523621559,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00489910626783967,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6629295937018466e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6629295937018466e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19496614634990692,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24985696673393248,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19496614634990692,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24985696673393248,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19496614634990692,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24985696673393248,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19496614634990692,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24985696673393248,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1944173663854599,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24920360147953033,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024302172008901835,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024302172008901835,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17979943454265596,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.86640625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23114030063152313,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022474929224699735,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022474929224699735,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19496614634990692,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24985696673393248,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024370769038796424,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011257660388946534,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.68515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014584441110491753,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011257660342380404,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011257660342380404,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17291922271251678,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23585031032562256,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017291922867298127,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017291922867298127,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.47709188794813834,
|
|
"eval_calibration/batch_distribution_entropy": 0.9182471569237116,
|
|
"eval_calibration/batch_entropy_100bins": 0.6967083197710886,
|
|
"eval_calibration/batch_entropy_10bins": 0.9182471569237116,
|
|
"eval_calibration/batch_entropy_50bins": 0.78747214973767,
|
|
"eval_calibration/batch_uniqueness": 0.8984375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9978553291331319,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9937933422243487,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9978553291331319,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9970143809817915,
|
|
"eval_calibration/confidence_entropy": 0.48359738329176344,
|
|
"eval_calibration/coverage@0%": 0.03125,
|
|
"eval_calibration/coverage@1%": 0.03125,
|
|
"eval_calibration/coverage@10%": 0.03125,
|
|
"eval_calibration/coverage@15%": 0.0625,
|
|
"eval_calibration/coverage@20%": 0.0859375,
|
|
"eval_calibration/coverage@25%": 0.1875,
|
|
"eval_calibration/coverage@30%": 0.28125,
|
|
"eval_calibration/coverage@5%": 0.03125,
|
|
"eval_calibration/ece": 0.19095125563443616,
|
|
"eval_calibration/mean_confidence": 0.472773278943161,
|
|
"eval_calibration/prompt_uniqueness": 0.8984375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 369.75,
|
|
"eval_completions/max_terminated_length": 369.75,
|
|
"eval_completions/mean_length": 199.24003219604492,
|
|
"eval_completions/mean_terminated_length": 199.24003219604492,
|
|
"eval_completions/min_length": 107.5,
|
|
"eval_completions/min_terminated_length": 107.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 505463785.0,
|
|
"eval_reward": 0.834217444062233,
|
|
"eval_reward_std": 0.22967081889510155,
|
|
"eval_rewards/accuracy_reward": 0.427734375,
|
|
"eval_rewards/brier_reward": 0.7905289083719254,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.899658203125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.004185517493169755,
|
|
"eval_rewards/frontier_coverage_0": 0.17981769144535065,
|
|
"eval_rewards/frontier_coverage_1": 0.17981769144535065,
|
|
"eval_rewards/frontier_coverage_10": 0.17981769144535065,
|
|
"eval_rewards/frontier_coverage_15": 0.17981769144535065,
|
|
"eval_rewards/frontier_coverage_20": 0.17774366959929466,
|
|
"eval_rewards/frontier_coverage_25": 0.15828291699290276,
|
|
"eval_rewards/frontier_coverage_5": 0.17981769144535065,
|
|
"eval_rewards/frontier_ece_reward": 0.0069712207186967134,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.64752197265625,
|
|
"eval_runtime": 19.8063,
|
|
"eval_samples_per_second": 25.245,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49310608208179474,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/advantage_abs_mean": 0.2121136114001274,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.2121136114001274,
|
|
"eval_signal/advantage_pre_scale_std": 0.22733867913484573,
|
|
"eval_signal/advantage_std": 0.22733867913484573,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.19264057651162148,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.875,
|
|
"eval_signal/brier_reward/group_std_mean": 0.24780448526144028,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019264057744294405,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019264057744294405,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.03955078125,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.046518636867403984,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003955078369472176,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003955078369472176,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005617183982394636,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6171875,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.011223837500438094,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.021479905233718e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.021479905233718e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.34257589280605316,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4243193119764328,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.34257589280605316,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4243193119764328,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.34257589280605316,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4243193119764328,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.34257589280605316,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4243193119764328,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.33809472620487213,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.41909338533878326,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0042261844500899315,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0042261844500899315,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.30766793340444565,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3833855912089348,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003845849307253957,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003845849307253957,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34257589280605316,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4243193119764328,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004282198729924858,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013124892022460699,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8359375,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.01785027328878641,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013124891556799412,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013124891556799412,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3224372863769531,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33711399137973785,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03224372863769531,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03224372863769531,
|
|
"eval_steps_per_second": 0.202,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.39113271435480473,
|
|
"calibration/batch_distribution_entropy": 0.9871660215184697,
|
|
"calibration/batch_entropy_100bins": 0.9700699867201148,
|
|
"calibration/batch_entropy_10bins": 0.9871660215184697,
|
|
"calibration/batch_entropy_50bins": 0.9807550994581791,
|
|
"calibration/batch_uniqueness": 0.9560546875,
|
|
"calibration/buffer_distribution_entropy": 0.9978919984582358,
|
|
"calibration/buffer_entropy_100bins": 0.9939673543065268,
|
|
"calibration/buffer_entropy_10bins": 0.9978919984582358,
|
|
"calibration/buffer_entropy_50bins": 0.9971106974993134,
|
|
"calibration/confidence_entropy": 0.4840377475415253,
|
|
"calibration/coverage@0%": 0.001171875,
|
|
"calibration/coverage@1%": 0.001171875,
|
|
"calibration/coverage@10%": 0.082421875,
|
|
"calibration/coverage@15%": 0.145703125,
|
|
"calibration/coverage@20%": 0.175390625,
|
|
"calibration/coverage@25%": 0.234765625,
|
|
"calibration/coverage@30%": 0.273046875,
|
|
"calibration/coverage@5%": 0.001171875,
|
|
"calibration/ece": 0.13277360800522978,
|
|
"calibration/mean_confidence": 0.5166870016179566,
|
|
"calibration/prompt_uniqueness": 0.863623046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 548.8,
|
|
"completions/max_terminated_length": 548.8,
|
|
"completions/mean_length": 199.92080078125,
|
|
"completions/mean_terminated_length": 199.92080078125,
|
|
"completions/min_length": 94.6,
|
|
"completions/min_terminated_length": 94.6,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0008479790994897485,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 522818814.0,
|
|
"reward": 0.9452289819717408,
|
|
"reward_std": 0.08597002923488617,
|
|
"rewards/accuracy_reward": 0.5462890625,
|
|
"rewards/brier_reward": 0.779764711856842,
|
|
"rewards/confidence_uniqueness_reward": 0.9554817199707031,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.003104729810729623,
|
|
"rewards/frontier_coverage_0": 0.08789373338222503,
|
|
"rewards/frontier_coverage_1": 0.08789373338222503,
|
|
"rewards/frontier_coverage_10": 0.08789373338222503,
|
|
"rewards/frontier_coverage_15": 0.08793908208608628,
|
|
"rewards/frontier_coverage_20": 0.0870589330792427,
|
|
"rewards/frontier_coverage_25": 0.07420700564980506,
|
|
"rewards/frontier_coverage_5": 0.08789373338222503,
|
|
"rewards/frontier_ece_reward": 0.006637497898191213,
|
|
"rewards/frontier_entropy_batch_reward": -0.09574897587299347,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1052490234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1405972883105278,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.59375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05262451171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05262451171875,
|
|
"signal/advantage_abs_mean": 0.06608989387750626,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06608989387750626,
|
|
"signal/advantage_pre_scale_std": 0.10801601260900498,
|
|
"signal/advantage_std": 0.10801601260900498,
|
|
"signal/brier_reward/centered_abs_mean": 0.13847643435001372,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8671875,
|
|
"signal/brier_reward/group_std_mean": 0.17648339569568633,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013847643136978149,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013847643136978149,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011437726020812989,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.014382154121994973,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011437725741416215,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011437725741416215,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003300163522362709,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7109375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005784228537231683,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.125204504816793e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.125204504816793e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17669629752635957,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2273976117372513,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17669629752635957,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2273976117372513,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17669629752635957,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2273976117372513,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1764056235551834,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22704406976699829,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022050703410059214,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022050703410059214,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17391692698001862,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.882421875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22390609383583068,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021739616990089417,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021739616990089417,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.151907816529274,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.19601451158523558,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018988477066159248,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018988477066159248,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17669629752635957,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.885546875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2273976117372513,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022087037097662686,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010442128777503968,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.69296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.013436480797827244,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010442128870636225,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010442128870636225,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15976455807685852,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76640625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.21749602854251862,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.015976456366479395,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015976456366479395,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3198817575673089,
|
|
"calibration/batch_distribution_entropy": 0.9839418406007525,
|
|
"calibration/batch_entropy_100bins": 0.9679700342611737,
|
|
"calibration/batch_entropy_10bins": 0.9839418406007525,
|
|
"calibration/batch_entropy_50bins": 0.9785860503623137,
|
|
"calibration/batch_uniqueness": 0.954736328125,
|
|
"calibration/buffer_distribution_entropy": 0.9980732134663842,
|
|
"calibration/buffer_entropy_100bins": 0.9944633582043301,
|
|
"calibration/buffer_entropy_10bins": 0.9980732134663842,
|
|
"calibration/buffer_entropy_50bins": 0.9973456517162692,
|
|
"calibration/confidence_entropy": 0.5027024811359091,
|
|
"calibration/coverage@0%": 0.00546875,
|
|
"calibration/coverage@1%": 0.00546875,
|
|
"calibration/coverage@10%": 0.10859375,
|
|
"calibration/coverage@15%": 0.22421875,
|
|
"calibration/coverage@20%": 0.310546875,
|
|
"calibration/coverage@25%": 0.423046875,
|
|
"calibration/coverage@30%": 0.5171875,
|
|
"calibration/coverage@5%": 0.0515625,
|
|
"calibration/ece": 0.13456900502830998,
|
|
"calibration/mean_confidence": 0.5221917640126743,
|
|
"calibration/prompt_uniqueness": 0.86943359375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 473.6,
|
|
"completions/max_terminated_length": 473.6,
|
|
"completions/mean_length": 193.7994140625,
|
|
"completions/mean_terminated_length": 193.7994140625,
|
|
"completions/min_length": 90.4,
|
|
"completions/min_terminated_length": 90.4,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.0009588066022843122,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 539948984.0,
|
|
"reward": 0.9407204031944275,
|
|
"reward_std": 0.08999814093112946,
|
|
"rewards/accuracy_reward": 0.536328125,
|
|
"rewards/brier_reward": 0.7952704071998596,
|
|
"rewards/confidence_uniqueness_reward": 0.9544906616210938,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0030694708693772554,
|
|
"rewards/frontier_coverage_0": 0.10995662808418274,
|
|
"rewards/frontier_coverage_1": 0.10995662808418274,
|
|
"rewards/frontier_coverage_10": 0.10985160320997238,
|
|
"rewards/frontier_coverage_15": 0.10972333252429962,
|
|
"rewards/frontier_coverage_20": 0.10820228606462479,
|
|
"rewards/frontier_coverage_25": 0.0877942256629467,
|
|
"rewards/frontier_coverage_5": 0.10995662808418274,
|
|
"rewards/frontier_ece_reward": 0.007269245106726885,
|
|
"rewards/frontier_entropy_batch_reward": -0.12426345646381379,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10556640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14583270251750946,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052783203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052783203125,
|
|
"signal/advantage_abs_mean": 0.06839020401239396,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06839020401239396,
|
|
"signal/advantage_pre_scale_std": 0.11198956221342087,
|
|
"signal/advantage_std": 0.11198956221342087,
|
|
"signal/brier_reward/centered_abs_mean": 0.13231053799390793,
|
|
"signal/brier_reward/group_bin_occupancy": 0.848046875,
|
|
"signal/brier_reward/group_std_mean": 0.1709081143140793,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013231054134666919,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013231054134666919,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012249898910522462,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.951953125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015168083272874356,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012249899096786975,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012249899096786975,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003360653715208173,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.698046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00556328808888793,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2008173477370295e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2008173477370295e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16207623481750488,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21392302811145783,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002025953074917197,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002025953074917197,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16207623481750488,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21392302811145783,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002025953074917197,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002025953074917197,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1619580239057541,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2137796849012375,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002024475345388055,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002024475345388055,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1616723895072937,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87890625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21342427134513856,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002020904840901494,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002020904840901494,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15862617492675782,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.877734375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20951247215270996,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00198282718192786,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00198282718192786,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12221903800964355,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1629067152738571,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015277379658073186,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015277379658073186,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16207623481750488,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21392302811145783,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002025953074917197,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002025953074917197,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010067025758326053,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.678125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012916841916739941,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001006702589802444,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001006702589802444,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19091980755329133,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.772265625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2507141649723053,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019091981835663317,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019091981835663317,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21586168859585855,
|
|
"calibration/batch_distribution_entropy": 0.9889782873586641,
|
|
"calibration/batch_entropy_100bins": 0.9707927997288179,
|
|
"calibration/batch_entropy_10bins": 0.9889782873586641,
|
|
"calibration/batch_entropy_50bins": 0.9823227270977654,
|
|
"calibration/batch_uniqueness": 0.9561279296875,
|
|
"calibration/buffer_distribution_entropy": 0.9982977167926123,
|
|
"calibration/buffer_entropy_100bins": 0.9947302848566546,
|
|
"calibration/buffer_entropy_10bins": 0.9982977167926123,
|
|
"calibration/buffer_entropy_50bins": 0.9975194217333142,
|
|
"calibration/confidence_entropy": 0.49518329062893385,
|
|
"calibration/coverage@0%": 0.03046875,
|
|
"calibration/coverage@1%": 0.03046875,
|
|
"calibration/coverage@10%": 0.2296875,
|
|
"calibration/coverage@15%": 0.41875,
|
|
"calibration/coverage@20%": 0.558203125,
|
|
"calibration/coverage@25%": 0.6453125,
|
|
"calibration/coverage@30%": 0.741015625,
|
|
"calibration/coverage@5%": 0.08828125,
|
|
"calibration/ece": 0.10502446492353879,
|
|
"calibration/mean_confidence": 0.5066498364850488,
|
|
"calibration/prompt_uniqueness": 0.856396484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 458.0,
|
|
"completions/max_terminated_length": 458.0,
|
|
"completions/mean_length": 195.665234375,
|
|
"completions/mean_terminated_length": 195.665234375,
|
|
"completions/min_length": 91.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0008851737948134542,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 556982132.0,
|
|
"reward": 0.9444244861602783,
|
|
"reward_std": 0.08863194286823273,
|
|
"rewards/accuracy_reward": 0.53828125,
|
|
"rewards/brier_reward": 0.8043418645858764,
|
|
"rewards/confidence_uniqueness_reward": 0.9550811767578125,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0023640868021175265,
|
|
"rewards/frontier_coverage_0": 0.12184633612632752,
|
|
"rewards/frontier_coverage_1": 0.12184633612632752,
|
|
"rewards/frontier_coverage_10": 0.12164630815386772,
|
|
"rewards/frontier_coverage_15": 0.12158802151679993,
|
|
"rewards/frontier_coverage_20": 0.1201416477560997,
|
|
"rewards/frontier_coverage_25": 0.09376933947205543,
|
|
"rewards/frontier_coverage_5": 0.1217406578361988,
|
|
"rewards/frontier_ece_reward": 0.006898148078471422,
|
|
"rewards/frontier_entropy_batch_reward": -0.11600946933031082,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1177001953125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.15068991482257843,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05885009765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05885009765625,
|
|
"signal/advantage_abs_mean": 0.06982930153608322,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06982930153608322,
|
|
"signal/advantage_pre_scale_std": 0.11278409063816071,
|
|
"signal/advantage_std": 0.11278409063816071,
|
|
"signal/brier_reward/centered_abs_mean": 0.12580630481243132,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84140625,
|
|
"signal/brier_reward/group_std_mean": 0.16182839572429658,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012580630742013455,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012580630742013455,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012181806564331054,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.951171875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015199671126902103,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012181806610897183,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012181806610897183,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002553233178332448,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69921875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0044392747804522514,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1915416184347126e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1915416184347126e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17886043787002565,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.880078125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23067601323127745,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022357555106282236,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022357555106282236,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17886043787002565,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.880078125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23067601323127745,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022357555106282236,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022357555106282236,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1785387396812439,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23026902973651886,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022317342925816776,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022317342925816776,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17839260399341583,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23008474409580232,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002229907549917698,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002229907549917698,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1752532511949539,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2261554479598999,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021906656213104723,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021906656213104723,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12653469890356064,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16514583826065063,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015816838014870882,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015816838014870882,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17872408032417297,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.880078125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2305031508207321,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002234051004052162,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002234051004052162,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008623083122074604,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.68125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011099833622574806,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008623083238489925,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008623083238489925,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1766481250524521,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75390625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23794711530208587,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01766481213271618,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01766481213271618,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2572418976278044,
|
|
"calibration/batch_distribution_entropy": 0.9795796288000306,
|
|
"calibration/batch_entropy_100bins": 0.9608326685871675,
|
|
"calibration/batch_entropy_10bins": 0.9795796288000306,
|
|
"calibration/batch_entropy_50bins": 0.9744636836243364,
|
|
"calibration/batch_uniqueness": 0.957818603515625,
|
|
"calibration/buffer_distribution_entropy": 0.9980996847351415,
|
|
"calibration/buffer_entropy_100bins": 0.9946257091132935,
|
|
"calibration/buffer_entropy_10bins": 0.9980996847351415,
|
|
"calibration/buffer_entropy_50bins": 0.9973523035428729,
|
|
"calibration/confidence_entropy": 0.4783709663886726,
|
|
"calibration/coverage@0%": 0.008984375,
|
|
"calibration/coverage@1%": 0.008984375,
|
|
"calibration/coverage@10%": 0.065625,
|
|
"calibration/coverage@15%": 0.1890625,
|
|
"calibration/coverage@20%": 0.355859375,
|
|
"calibration/coverage@25%": 0.54609375,
|
|
"calibration/coverage@30%": 0.707421875,
|
|
"calibration/coverage@5%": 0.01171875,
|
|
"calibration/ece": 0.08335968835444894,
|
|
"calibration/mean_confidence": 0.5398684844209177,
|
|
"calibration/prompt_uniqueness": 0.870166015625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 519.6,
|
|
"completions/max_terminated_length": 519.6,
|
|
"completions/mean_length": 196.367578125,
|
|
"completions/mean_terminated_length": 196.367578125,
|
|
"completions/min_length": 90.6,
|
|
"completions/min_terminated_length": 90.6,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.000998239149339497,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 574156520.0,
|
|
"reward": 0.9517637491226196,
|
|
"reward_std": 0.0945101723074913,
|
|
"rewards/accuracy_reward": 0.566796875,
|
|
"rewards/brier_reward": 0.7817894101142884,
|
|
"rewards/confidence_uniqueness_reward": 0.9573951721191406,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.00291836392134428,
|
|
"rewards/frontier_coverage_0": 0.0726560816168785,
|
|
"rewards/frontier_coverage_1": 0.0726560816168785,
|
|
"rewards/frontier_coverage_10": 0.07252307832241059,
|
|
"rewards/frontier_coverage_15": 0.0724917471408844,
|
|
"rewards/frontier_coverage_20": 0.07174690216779708,
|
|
"rewards/frontier_coverage_25": 0.054031723737716676,
|
|
"rewards/frontier_coverage_5": 0.0725811704993248,
|
|
"rewards/frontier_ece_reward": 0.005196140892803669,
|
|
"rewards/frontier_entropy_batch_reward": -0.12144885808229447,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1263427734375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.181640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.16339680254459382,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06317138671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06317138671875,
|
|
"signal/advantage_abs_mean": 0.07346977144479752,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07346977144479752,
|
|
"signal/advantage_pre_scale_std": 0.11571350246667862,
|
|
"signal/advantage_std": 0.11571350246667862,
|
|
"signal/brier_reward/centered_abs_mean": 0.14408716559410095,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8609375,
|
|
"signal/brier_reward/group_std_mean": 0.18343546390533447,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01440871749073267,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01440871749073267,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01195671558380127,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.95390625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01492820680141449,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00119567159563303,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00119567159563303,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003072212403640151,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7078125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005290277022868395,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8402655627578496e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8402655627578496e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19166307151317596,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24394794702529907,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023957884404808283,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023957884404808283,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19166307151317596,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24394794702529907,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023957884404808283,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023957884404808283,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19123572409152984,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2434035360813141,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023904466070234776,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023904466070234776,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1911354660987854,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24327577650547028,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023891933728009463,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023891933728009463,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18775070905685426,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23907591700553893,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023468838538974524,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023468838538974524,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12309406846761703,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1585517108440399,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015386758605018258,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015386758605018258,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19142203629016877,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.871484375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24364092350006103,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002392775472253561,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002392775472253561,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009025206603109837,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.69453125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011522319540381432,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009025206556543708,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009025206556543708,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18435474932193757,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.799609375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24489886164665223,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018435475416481495,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018435475416481495,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2701489115748014,
|
|
"calibration/batch_distribution_entropy": 0.9858005961207958,
|
|
"calibration/batch_entropy_100bins": 0.9584672863006161,
|
|
"calibration/batch_entropy_10bins": 0.9858005961207958,
|
|
"calibration/batch_entropy_50bins": 0.9780489974776383,
|
|
"calibration/batch_uniqueness": 0.9608780146637634,
|
|
"calibration/buffer_distribution_entropy": 0.9978735851080804,
|
|
"calibration/buffer_entropy_100bins": 0.9944173356403502,
|
|
"calibration/buffer_entropy_10bins": 0.9978735851080804,
|
|
"calibration/buffer_entropy_50bins": 0.99715421757982,
|
|
"calibration/confidence_entropy": 0.5013730363986142,
|
|
"calibration/coverage@0%": 0.04025501467710372,
|
|
"calibration/coverage@1%": 0.04025501467710372,
|
|
"calibration/coverage@10%": 0.2434396404109589,
|
|
"calibration/coverage@15%": 0.28407687133072407,
|
|
"calibration/coverage@20%": 0.32043480919765166,
|
|
"calibration/coverage@25%": 0.44272871819960863,
|
|
"calibration/coverage@30%": 0.5927577666340509,
|
|
"calibration/coverage@5%": 0.13478932240704503,
|
|
"calibration/ece": 0.11489932579269604,
|
|
"calibration/mean_confidence": 0.493834637039139,
|
|
"calibration/prompt_uniqueness": 0.8709420830352498,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1088.2,
|
|
"completions/max_terminated_length": 488.6,
|
|
"completions/mean_length": 196.847265625,
|
|
"completions/mean_terminated_length": 196.45567321777344,
|
|
"completions/min_length": 88.6,
|
|
"completions/min_terminated_length": 88.6,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0010303459130227566,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 590993644.0,
|
|
"reward": 0.9366175770759583,
|
|
"reward_std": 0.08155035525560379,
|
|
"rewards/accuracy_reward": 0.52333984375,
|
|
"rewards/brier_reward": 0.7957682371139526,
|
|
"rewards/confidence_uniqueness_reward": 0.9611407041549682,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0028076643124222756,
|
|
"rewards/frontier_coverage_0": 0.1162565752863884,
|
|
"rewards/frontier_coverage_1": 0.1162565752863884,
|
|
"rewards/frontier_coverage_10": 0.11595501154661178,
|
|
"rewards/frontier_coverage_15": 0.11588414609432221,
|
|
"rewards/frontier_coverage_20": 0.11414353549480438,
|
|
"rewards/frontier_coverage_25": 0.07903541177511215,
|
|
"rewards/frontier_coverage_5": 0.1160865843296051,
|
|
"rewards/frontier_ece_reward": 0.0057329384610056875,
|
|
"rewards/frontier_entropy_batch_reward": -0.10805188417434693,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.092449951171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12797222435474395,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0462249755859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0462249755859375,
|
|
"signal/advantage_abs_mean": 0.06160459816455841,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06160459816455841,
|
|
"signal/advantage_pre_scale_std": 0.10464294999837875,
|
|
"signal/advantage_std": 0.10464294999837875,
|
|
"signal/brier_reward/centered_abs_mean": 0.1313459038734436,
|
|
"signal/brier_reward/group_bin_occupancy": 0.859375,
|
|
"signal/brier_reward/group_std_mean": 0.169065198302269,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013134590722620488,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013134590722620488,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012869596667587756,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9203125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01707104854285717,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012869596714153886,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012869596714153886,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027754565235227346,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71640625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004652646463364362,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.469320727162994e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.469320727162994e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16855145394802093,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21740144789218901,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002106893085874617,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002106893085874617,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16855145394802093,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21740144789218901,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002106893085874617,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002106893085874617,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16817551851272583,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21691781878471375,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021021940745413305,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021021940745413305,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16808723509311677,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21680429875850676,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021010904340073465,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021010904340073465,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16493748724460602,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.884765625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2127989798784256,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002061718562617898,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002061718562617898,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10539229065179825,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.875390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13706440329551697,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001317403675056994,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001317403675056994,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1683394968509674,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21712871193885802,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002104243659414351,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002104243659414351,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007921327650547028,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.670703125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010234402120113372,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007921327836811542,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007921327836811542,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1686824917793274,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.777734375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.22734990119934081,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016868249885737895,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016868249885737895,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30699371259582486,
|
|
"calibration/batch_distribution_entropy": 0.9854770880284256,
|
|
"calibration/batch_entropy_100bins": 0.9541744663609115,
|
|
"calibration/batch_entropy_10bins": 0.9854770880284256,
|
|
"calibration/batch_entropy_50bins": 0.9757598769843548,
|
|
"calibration/batch_uniqueness": 0.963922119140625,
|
|
"calibration/buffer_distribution_entropy": 0.9981526346641395,
|
|
"calibration/buffer_entropy_100bins": 0.9944422625280505,
|
|
"calibration/buffer_entropy_10bins": 0.9981526346641395,
|
|
"calibration/buffer_entropy_50bins": 0.9972570207198382,
|
|
"calibration/confidence_entropy": 0.496733407619662,
|
|
"calibration/coverage@0%": 0.0484375,
|
|
"calibration/coverage@1%": 0.0484375,
|
|
"calibration/coverage@10%": 0.131640625,
|
|
"calibration/coverage@15%": 0.21015625,
|
|
"calibration/coverage@20%": 0.263671875,
|
|
"calibration/coverage@25%": 0.35703125,
|
|
"calibration/coverage@30%": 0.519921875,
|
|
"calibration/coverage@5%": 0.062109375,
|
|
"calibration/ece": 0.10147968634650846,
|
|
"calibration/mean_confidence": 0.4753874622329694,
|
|
"calibration/prompt_uniqueness": 0.86865234375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 514.6,
|
|
"completions/max_terminated_length": 514.6,
|
|
"completions/mean_length": 198.885546875,
|
|
"completions/mean_terminated_length": 198.885546875,
|
|
"completions/min_length": 98.2,
|
|
"completions/min_terminated_length": 98.2,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0010701222345232964,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 608216856.0,
|
|
"reward": 0.9302929162979126,
|
|
"reward_std": 0.07859764248132706,
|
|
"rewards/accuracy_reward": 0.51591796875,
|
|
"rewards/brier_reward": 0.7868143916130066,
|
|
"rewards/confidence_uniqueness_reward": 0.9633003234863281,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002910622302442789,
|
|
"rewards/frontier_coverage_0": 0.1169891744852066,
|
|
"rewards/frontier_coverage_1": 0.1169891744852066,
|
|
"rewards/frontier_coverage_10": 0.11675787419080734,
|
|
"rewards/frontier_coverage_15": 0.1165872111916542,
|
|
"rewards/frontier_coverage_20": 0.11500040218234062,
|
|
"rewards/frontier_coverage_25": 0.07931032329797745,
|
|
"rewards/frontier_coverage_5": 0.1169125959277153,
|
|
"rewards/frontier_ece_reward": 0.004967722669243812,
|
|
"rewards/frontier_entropy_batch_reward": -0.12869784384965896,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.089678955078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12337614744901657,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0448394775390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0448394775390625,
|
|
"signal/advantage_abs_mean": 0.059406136721372606,
|
|
"signal/advantage_pre_scale_abs_mean": 0.059406136721372606,
|
|
"signal/advantage_pre_scale_std": 0.09829565286636352,
|
|
"signal/advantage_std": 0.09829565286636352,
|
|
"signal/brier_reward/centered_abs_mean": 0.13098579347133638,
|
|
"signal/brier_reward/group_bin_occupancy": 0.851171875,
|
|
"signal/brier_reward/group_std_mean": 0.1678290545940399,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01309858001768589,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01309858001768589,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013459730148315429,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.890625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017082039453089237,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013459730194881558,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013459730194881558,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026516387006267905,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004528477415442466,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.314548303023912e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.314548303023912e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17838220298290253,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8734375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22681029438972472,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002229777490720153,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002229777490720153,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17838220298290253,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8734375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22681029438972472,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002229777490720153,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002229777490720153,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17802486717700958,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22636044323444365,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002225310867652297,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002225310867652297,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17769393920898438,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2259401261806488,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022211743518710135,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022211743518710135,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17457264065742492,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.871484375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22201407551765442,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021821580128744246,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021821580128744246,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11014373302459717,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.876171875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14102184176445007,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013767967000603675,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013767967000603675,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1782844364643097,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8734375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22668357491493224,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00222855550237,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00222855550237,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00747135728597641,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009620749577879905,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007471357355825603,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007471357355825603,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1880700945854187,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773046875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25184816122055054,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01880700998008251,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01880700998008251,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3112719069113949,
|
|
"calibration/batch_distribution_entropy": 0.9798265397899921,
|
|
"calibration/batch_entropy_100bins": 0.946650027126187,
|
|
"calibration/batch_entropy_10bins": 0.9798265397899921,
|
|
"calibration/batch_entropy_50bins": 0.9711012111621464,
|
|
"calibration/batch_uniqueness": 0.960726591011013,
|
|
"calibration/buffer_distribution_entropy": 0.9982804261548217,
|
|
"calibration/buffer_entropy_100bins": 0.9943895114204372,
|
|
"calibration/buffer_entropy_10bins": 0.9982804261548217,
|
|
"calibration/buffer_entropy_50bins": 0.9972331678385433,
|
|
"calibration/confidence_entropy": 0.48484172667766945,
|
|
"calibration/coverage@0%": 0.012899033757338552,
|
|
"calibration/coverage@1%": 0.012899033757338552,
|
|
"calibration/coverage@10%": 0.20021633439334638,
|
|
"calibration/coverage@15%": 0.29246346012720154,
|
|
"calibration/coverage@20%": 0.3956931873776908,
|
|
"calibration/coverage@25%": 0.49148880870841494,
|
|
"calibration/coverage@30%": 0.5556078767123288,
|
|
"calibration/coverage@5%": 0.046944563356164384,
|
|
"calibration/ece": 0.14819788302443063,
|
|
"calibration/mean_confidence": 0.4693338775372896,
|
|
"calibration/prompt_uniqueness": 0.8631444194686525,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 950.8,
|
|
"completions/max_terminated_length": 725.0,
|
|
"completions/mean_length": 201.2125,
|
|
"completions/mean_terminated_length": 200.95231018066406,
|
|
"completions/min_length": 93.2,
|
|
"completions/min_terminated_length": 93.2,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0008901117253117263,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 625444984.0,
|
|
"reward": 0.9317437887191773,
|
|
"reward_std": 0.0811847597360611,
|
|
"rewards/accuracy_reward": 0.5265625,
|
|
"rewards/brier_reward": 0.7796778678894043,
|
|
"rewards/confidence_uniqueness_reward": 0.9608142495155334,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002869694633409381,
|
|
"rewards/frontier_coverage_0": 0.1071198582649231,
|
|
"rewards/frontier_coverage_1": 0.1071198582649231,
|
|
"rewards/frontier_coverage_10": 0.10704143643379212,
|
|
"rewards/frontier_coverage_15": 0.10693599283695221,
|
|
"rewards/frontier_coverage_20": 0.10569211542606353,
|
|
"rewards/frontier_coverage_25": 0.06944319903850556,
|
|
"rewards/frontier_coverage_5": 0.1071198582649231,
|
|
"rewards/frontier_ece_reward": 0.004962181858718395,
|
|
"rewards/frontier_entropy_batch_reward": -0.14830300509929656,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.099169921875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12951190322637557,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495849609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0495849609375,
|
|
"signal/advantage_abs_mean": 0.06339782625436782,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06339782625436782,
|
|
"signal/advantage_pre_scale_std": 0.10178755819797516,
|
|
"signal/advantage_std": 0.10178755819797516,
|
|
"signal/brier_reward/centered_abs_mean": 0.13208021223545074,
|
|
"signal/brier_reward/group_bin_occupancy": 0.830859375,
|
|
"signal/brier_reward/group_std_mean": 0.16981416642665864,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013208021223545075,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013208021223545075,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016946067102253437,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.83671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.022317757830023766,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0016946068033576011,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016946068033576011,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027397330850362778,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.698828125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004624523315578699,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4246665018144994e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4246665018144994e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1888021171092987,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23875601589679718,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002360026491805911,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002360026491805911,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1888021171092987,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23875601589679718,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002360026491805911,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002360026491805911,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18852269053459167,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23838862776756287,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023565337061882017,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023565337061882017,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18781451284885406,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2375061869621277,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023476815316826106,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023476815316826106,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18328932523727418,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2319197654724121,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002291116584092379,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002291116584092379,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11159079521894455,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.878125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14274431467056276,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013948849868029357,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013948849868029357,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1888021171092987,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23875601589679718,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002360026491805911,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002360026491805911,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0074934825301170346,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.656640625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009578016772866248,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007493482902646065,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007493482902646065,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21184809803962706,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.731640625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2794174253940582,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021184809505939484,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021184809505939484,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23472786932091197,
|
|
"calibration/batch_distribution_entropy": 0.9717881079374342,
|
|
"calibration/batch_entropy_100bins": 0.946275985669382,
|
|
"calibration/batch_entropy_10bins": 0.9717881079374342,
|
|
"calibration/batch_entropy_50bins": 0.9700067463765413,
|
|
"calibration/batch_uniqueness": 0.9602325439453125,
|
|
"calibration/buffer_distribution_entropy": 0.9984455204444757,
|
|
"calibration/buffer_entropy_100bins": 0.9943282725910221,
|
|
"calibration/buffer_entropy_10bins": 0.9984455204444757,
|
|
"calibration/buffer_entropy_50bins": 0.9972307313275625,
|
|
"calibration/confidence_entropy": 0.4705205830282598,
|
|
"calibration/coverage@0%": 0.00625,
|
|
"calibration/coverage@1%": 0.00625,
|
|
"calibration/coverage@10%": 0.258984375,
|
|
"calibration/coverage@15%": 0.38125,
|
|
"calibration/coverage@20%": 0.5,
|
|
"calibration/coverage@25%": 0.613671875,
|
|
"calibration/coverage@30%": 0.703125,
|
|
"calibration/coverage@5%": 0.0734375,
|
|
"calibration/ece": 0.09547035939570313,
|
|
"calibration/mean_confidence": 0.4757562031042969,
|
|
"calibration/prompt_uniqueness": 0.859912109375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 668.0,
|
|
"completions/max_terminated_length": 464.2,
|
|
"completions/mean_length": 208.0927734375,
|
|
"completions/mean_terminated_length": 207.96302185058593,
|
|
"completions/min_length": 101.0,
|
|
"completions/min_terminated_length": 101.0,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0007230865303426981,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 642575342.0,
|
|
"reward": 0.9399636507034301,
|
|
"reward_std": 0.07548893839120865,
|
|
"rewards/accuracy_reward": 0.5287109375,
|
|
"rewards/brier_reward": 0.8094612717628479,
|
|
"rewards/confidence_uniqueness_reward": 0.9632078409194946,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.00239487262442708,
|
|
"rewards/frontier_coverage_0": 0.13547399938106536,
|
|
"rewards/frontier_coverage_1": 0.13547399938106536,
|
|
"rewards/frontier_coverage_10": 0.13500653803348542,
|
|
"rewards/frontier_coverage_15": 0.13492192924022675,
|
|
"rewards/frontier_coverage_20": 0.1289721041917801,
|
|
"rewards/frontier_coverage_25": 0.08149012476205826,
|
|
"rewards/frontier_coverage_5": 0.13547399938106536,
|
|
"rewards/frontier_ece_reward": 0.0059300497174263,
|
|
"rewards/frontier_entropy_batch_reward": -0.13258121609687806,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09466552734375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12220577746629716,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047332763671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047332763671875,
|
|
"signal/advantage_abs_mean": 0.0584713064134121,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0584713064134121,
|
|
"signal/advantage_pre_scale_std": 0.09739241003990173,
|
|
"signal/advantage_std": 0.09739241003990173,
|
|
"signal/brier_reward/centered_abs_mean": 0.12433077991008759,
|
|
"signal/brier_reward/group_bin_occupancy": 0.836328125,
|
|
"signal/brier_reward/group_std_mean": 0.15874851047992705,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0124330785125494,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0124330785125494,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014638883247971534,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.860546875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01877986006438732,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014638883760198951,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014638883760198951,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023107386427000166,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037692871410399675,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8884234416182154e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8884234416182154e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1809740960597992,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22811082899570465,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022621762473136187,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022621762473136187,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1809740960597992,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22811082899570465,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022621762473136187,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022621762473136187,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18020015954971313,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8671875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22716450095176696,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002252502040937543,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002252502040937543,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18002530038356782,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22695587575435638,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022503164131194354,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022503164131194354,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17040481567382812,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21481127440929412,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021300603169947863,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021300603169947863,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09835156053304672,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12501538395881653,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012293945765122771,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012293945765122771,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1809740960597992,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22811082899570465,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022621762473136187,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022621762473136187,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007257287390530109,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.640234375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00914002489298582,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007257287506945431,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007257287506945431,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1907802402973175,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.773828125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25298523604869844,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019078024849295615,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019078024849295615,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2700117873695714,
|
|
"calibration/batch_distribution_entropy": 0.9849202351740087,
|
|
"calibration/batch_entropy_100bins": 0.9480692672613837,
|
|
"calibration/batch_entropy_10bins": 0.9849202351740087,
|
|
"calibration/batch_entropy_50bins": 0.9754065036177344,
|
|
"calibration/batch_uniqueness": 0.9661651611328125,
|
|
"calibration/buffer_distribution_entropy": 0.9984497746065746,
|
|
"calibration/buffer_entropy_100bins": 0.9940789054446035,
|
|
"calibration/buffer_entropy_10bins": 0.9984497746065746,
|
|
"calibration/buffer_entropy_50bins": 0.997180604698514,
|
|
"calibration/confidence_entropy": 0.5025687742045375,
|
|
"calibration/coverage@0%": 0.01484375,
|
|
"calibration/coverage@1%": 0.01484375,
|
|
"calibration/coverage@10%": 0.082421875,
|
|
"calibration/coverage@15%": 0.191015625,
|
|
"calibration/coverage@20%": 0.360546875,
|
|
"calibration/coverage@25%": 0.4765625,
|
|
"calibration/coverage@30%": 0.6328125,
|
|
"calibration/coverage@5%": 0.026171875,
|
|
"calibration/ece": 0.08137528389843751,
|
|
"calibration/mean_confidence": 0.523429369834375,
|
|
"calibration/prompt_uniqueness": 0.874169921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 522.2,
|
|
"completions/max_terminated_length": 522.2,
|
|
"completions/mean_length": 214.3845703125,
|
|
"completions/mean_terminated_length": 214.3845703125,
|
|
"completions/min_length": 103.0,
|
|
"completions/min_terminated_length": 103.0,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0008414575131610036,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 660114544.0,
|
|
"reward": 0.9401260256767273,
|
|
"reward_std": 0.08299153149127961,
|
|
"rewards/accuracy_reward": 0.528125,
|
|
"rewards/brier_reward": 0.805389142036438,
|
|
"rewards/confidence_uniqueness_reward": 0.9662490844726562,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002919661020860076,
|
|
"rewards/frontier_coverage_0": 0.12498158812522889,
|
|
"rewards/frontier_coverage_1": 0.12498158812522889,
|
|
"rewards/frontier_coverage_10": 0.12457616329193115,
|
|
"rewards/frontier_coverage_15": 0.12416773438453674,
|
|
"rewards/frontier_coverage_20": 0.11661605983972549,
|
|
"rewards/frontier_coverage_25": 0.07004784420132637,
|
|
"rewards/frontier_coverage_5": 0.12498158812522889,
|
|
"rewards/frontier_ece_reward": 0.004868039395660162,
|
|
"rewards/frontier_entropy_batch_reward": -0.11680007576942444,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.099853515625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.173046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13264059126377106,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0499267578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0499267578125,
|
|
"signal/advantage_abs_mean": 0.0640070766210556,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0640070766210556,
|
|
"signal/advantage_pre_scale_std": 0.1057273805141449,
|
|
"signal/advantage_std": 0.1057273805141449,
|
|
"signal/brier_reward/centered_abs_mean": 0.1260695680975914,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85234375,
|
|
"signal/brier_reward/group_std_mean": 0.16242826581001282,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012606956996023655,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012606956996023655,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012650418281555175,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.878515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015894681215286255,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001265041856095195,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001265041856095195,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002833597734570503,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0049308110028505325,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.541997357388027e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.541997357388027e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16888214647769928,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21781981885433196,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021110267844051123,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021110267844051123,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16888214647769928,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21781981885433196,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021110267844051123,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021110267844051123,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16817994117736818,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21693590581417083,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002102249301970005,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002102249301970005,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16733984053134918,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21589226722717286,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020917480811476707,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020917480811476707,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15284331440925597,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19766626060009002,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019105415092781186,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019105415092781186,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08127258270978928,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8921875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10656125694513321,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010159073397517204,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010159073397517204,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16888214647769928,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21781981885433196,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021110267844051123,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021110267844051123,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006312453839927912,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.645703125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008178574219346046,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006312454002909363,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006312454002909363,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18553299009799956,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.77421875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2506150871515274,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01855330020189285,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01855330020189285,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28188014168873426,
|
|
"calibration/batch_distribution_entropy": 0.9805733776248658,
|
|
"calibration/batch_entropy_100bins": 0.9462408809338638,
|
|
"calibration/batch_entropy_10bins": 0.9805733776248658,
|
|
"calibration/batch_entropy_50bins": 0.9735475700074046,
|
|
"calibration/batch_uniqueness": 0.96727294921875,
|
|
"calibration/buffer_distribution_entropy": 0.9985629674566393,
|
|
"calibration/buffer_entropy_100bins": 0.993793542280277,
|
|
"calibration/buffer_entropy_10bins": 0.9985629674566393,
|
|
"calibration/buffer_entropy_50bins": 0.9971571943470149,
|
|
"calibration/confidence_entropy": 0.5155245961834228,
|
|
"calibration/coverage@0%": 0.022265625,
|
|
"calibration/coverage@1%": 0.022265625,
|
|
"calibration/coverage@10%": 0.27109375,
|
|
"calibration/coverage@15%": 0.342578125,
|
|
"calibration/coverage@20%": 0.390234375,
|
|
"calibration/coverage@25%": 0.479296875,
|
|
"calibration/coverage@30%": 0.586328125,
|
|
"calibration/coverage@5%": 0.169140625,
|
|
"calibration/ece": 0.16515665708359378,
|
|
"calibration/mean_confidence": 0.5311759464164062,
|
|
"calibration/prompt_uniqueness": 0.88291015625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 764.2,
|
|
"completions/max_terminated_length": 574.0,
|
|
"completions/mean_length": 220.08583984375,
|
|
"completions/mean_terminated_length": 219.95709533691405,
|
|
"completions/min_length": 101.6,
|
|
"completions/min_terminated_length": 101.6,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.002240754896774888,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 677710911.0,
|
|
"reward": 0.9547593593597412,
|
|
"reward_std": 0.07518478035926819,
|
|
"rewards/accuracy_reward": 0.568359375,
|
|
"rewards/brier_reward": 0.8007413268089294,
|
|
"rewards/confidence_uniqueness_reward": 0.965564739704132,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002697795373387635,
|
|
"rewards/frontier_coverage_0": 0.0924006424844265,
|
|
"rewards/frontier_coverage_1": 0.0924006424844265,
|
|
"rewards/frontier_coverage_10": 0.09219776839017868,
|
|
"rewards/frontier_coverage_15": 0.09181699305772781,
|
|
"rewards/frontier_coverage_20": 0.08537925407290459,
|
|
"rewards/frontier_coverage_25": 0.0547153040766716,
|
|
"rewards/frontier_coverage_5": 0.0924006424844265,
|
|
"rewards/frontier_ece_reward": 0.004436677880585193,
|
|
"rewards/frontier_entropy_batch_reward": -0.1387961506843567,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07882080078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.164453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10743281841278077,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.684375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039410400390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039410400390625,
|
|
"signal/advantage_abs_mean": 0.05716605037450791,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05716605037450791,
|
|
"signal/advantage_pre_scale_std": 0.09739655405282974,
|
|
"signal/advantage_std": 0.09739655405282974,
|
|
"signal/brier_reward/centered_abs_mean": 0.1214315801858902,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8515625,
|
|
"signal/brier_reward/group_std_mean": 0.15803540349006653,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012143158353865147,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012143158353865147,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012997383438050746,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.862109375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017083115130662917,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012997383950278164,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012997383950278164,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027137193828821182,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004572696890681982,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.392149301362224e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.392149301362224e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15220192968845367,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19722159206867218,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019025241024792194,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019025241024792194,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15220192968845367,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19722159206867218,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019025241024792194,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019025241024792194,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15184370577335357,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88046875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19676703512668609,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018980463733896612,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018980463733896612,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15081839263439178,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.880078125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19544895887374877,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018852299312129618,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018852299312129618,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13319507688283921,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17282224893569947,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016649385681375862,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016649385681375862,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06962503343820572,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09113402217626572,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000870312936604023,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000870312936604023,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15220192968845367,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19722159206867218,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019025241024792194,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019025241024792194,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006119345035403967,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007958116941154003,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006119344965554774,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006119344965554774,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19946256577968596,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.26395664513111117,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019946256838738918,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019946256838738918,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.45046525025940026,
|
|
"eval_calibration/batch_distribution_entropy": 0.9294361395739166,
|
|
"eval_calibration/batch_entropy_100bins": 0.7037637102944632,
|
|
"eval_calibration/batch_entropy_10bins": 0.9294361395739166,
|
|
"eval_calibration/batch_entropy_50bins": 0.7976394251687032,
|
|
"eval_calibration/batch_uniqueness": 0.9111328125,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9986805045240452,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9934918532172449,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9986805045240452,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9971155639822349,
|
|
"eval_calibration/confidence_entropy": 0.4882177446761135,
|
|
"eval_calibration/coverage@0%": 0.0859375,
|
|
"eval_calibration/coverage@1%": 0.0859375,
|
|
"eval_calibration/coverage@10%": 0.1328125,
|
|
"eval_calibration/coverage@15%": 0.1328125,
|
|
"eval_calibration/coverage@20%": 0.140625,
|
|
"eval_calibration/coverage@25%": 0.171875,
|
|
"eval_calibration/coverage@30%": 0.1796875,
|
|
"eval_calibration/coverage@5%": 0.0859375,
|
|
"eval_calibration/ece": 0.231015625,
|
|
"eval_calibration/mean_confidence": 0.483984375,
|
|
"eval_calibration/prompt_uniqueness": 0.9111328125,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 421.25,
|
|
"eval_completions/max_terminated_length": 421.25,
|
|
"eval_completions/mean_length": 223.7101287841797,
|
|
"eval_completions/mean_terminated_length": 223.7101287841797,
|
|
"eval_completions/min_length": 122.5,
|
|
"eval_completions/min_terminated_length": 122.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 677710911.0,
|
|
"eval_reward": 0.8325561285018921,
|
|
"eval_reward_std": 0.23123450949788094,
|
|
"eval_rewards/accuracy_reward": 0.421875,
|
|
"eval_rewards/brier_reward": 0.7981202453374863,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.9072265625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.003328263759613037,
|
|
"eval_rewards/frontier_coverage_0": 0.183719702064991,
|
|
"eval_rewards/frontier_coverage_1": 0.183719702064991,
|
|
"eval_rewards/frontier_coverage_10": 0.1834387667477131,
|
|
"eval_rewards/frontier_coverage_15": 0.18294651806354523,
|
|
"eval_rewards/frontier_coverage_20": 0.16035383194684982,
|
|
"eval_rewards/frontier_coverage_25": 0.0812362264841795,
|
|
"eval_rewards/frontier_coverage_5": 0.183719702064991,
|
|
"eval_rewards/frontier_ece_reward": 0.004539699875749648,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6381759643554688,
|
|
"eval_runtime": 21.1259,
|
|
"eval_samples_per_second": 23.668,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.470703125,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4925759807229042,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2353515625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2353515625,
|
|
"eval_signal/advantage_abs_mean": 0.21354259178042412,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21354259178042412,
|
|
"eval_signal/advantage_pre_scale_std": 0.2286580204963684,
|
|
"eval_signal/advantage_std": 0.2286580204963684,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.17807075753808022,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.84375,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2285139560699463,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017807076685130596,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.017807076685130596,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0390625,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.047283546067774296,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003906250116415322,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003906250116415322,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004127664549741894,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.640625,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008040638058446348,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1595807235571556e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1595807235571556e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.33542679250240326,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.41075549274683,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004192834836430848,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004192834836430848,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.33542679250240326,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.41075549274683,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004192834836430848,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004192834836430848,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.33442478626966476,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4096386879682541,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041803098283708096,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041803098283708096,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3335842937231064,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4086746945977211,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004169803811237216,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004169803811237216,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.28882090747356415,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.3565327152609825,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00361026159953326,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00361026159953326,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1293876338750124,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.16918417811393738,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016173454350791872,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016173454350791872,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.33542679250240326,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.41075549274683,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004192834836430848,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004192834836430848,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.007051818422041833,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8671875,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.009225662797689438,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007051818392938003,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007051818392938003,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.332033634185791,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.3422466069459915,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0332033634185791,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0332033634185791,
|
|
"eval_steps_per_second": 0.189,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.41416409000016474,
|
|
"calibration/batch_distribution_entropy": 0.9835108836589204,
|
|
"calibration/batch_entropy_100bins": 0.9492298170911946,
|
|
"calibration/batch_entropy_10bins": 0.9835108836589204,
|
|
"calibration/batch_entropy_50bins": 0.9746778983389239,
|
|
"calibration/batch_uniqueness": 0.964678955078125,
|
|
"calibration/buffer_distribution_entropy": 0.9987585634745246,
|
|
"calibration/buffer_entropy_100bins": 0.9932715162674421,
|
|
"calibration/buffer_entropy_10bins": 0.9987585634745246,
|
|
"calibration/buffer_entropy_50bins": 0.9970967811961777,
|
|
"calibration/confidence_entropy": 0.5169030002642951,
|
|
"calibration/coverage@0%": 0.00234375,
|
|
"calibration/coverage@1%": 0.00234375,
|
|
"calibration/coverage@10%": 0.00625,
|
|
"calibration/coverage@15%": 0.022265625,
|
|
"calibration/coverage@20%": 0.051953125,
|
|
"calibration/coverage@25%": 0.156640625,
|
|
"calibration/coverage@30%": 0.269140625,
|
|
"calibration/coverage@5%": 0.00234375,
|
|
"calibration/ece": 0.11185393470703127,
|
|
"calibration/mean_confidence": 0.47490546869140626,
|
|
"calibration/prompt_uniqueness": 0.87470703125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 788.8,
|
|
"completions/max_terminated_length": 584.2,
|
|
"completions/mean_length": 216.35341796875,
|
|
"completions/mean_terminated_length": 216.2248046875,
|
|
"completions/min_length": 97.6,
|
|
"completions/min_terminated_length": 97.6,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0008305140654556453,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 694782914.0,
|
|
"reward": 0.9230387210845947,
|
|
"reward_std": 0.08519956022500992,
|
|
"rewards/accuracy_reward": 0.50146484375,
|
|
"rewards/brier_reward": 0.7805219888687134,
|
|
"rewards/confidence_uniqueness_reward": 0.9658442378044129,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0030296589247882366,
|
|
"rewards/frontier_coverage_0": 0.11411752551794052,
|
|
"rewards/frontier_coverage_1": 0.11411752551794052,
|
|
"rewards/frontier_coverage_10": 0.113961161673069,
|
|
"rewards/frontier_coverage_15": 0.11310399323701859,
|
|
"rewards/frontier_coverage_20": 0.10050597786903381,
|
|
"rewards/frontier_coverage_25": 0.05612687692046166,
|
|
"rewards/frontier_coverage_5": 0.11411752551794052,
|
|
"rewards/frontier_ece_reward": 0.0036422216799110174,
|
|
"rewards/frontier_entropy_batch_reward": -0.11683483868837356,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.101885986328125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1349509835243225,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0509429931640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0509429931640625,
|
|
"signal/advantage_abs_mean": 0.06606777310371399,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06606777310371399,
|
|
"signal/advantage_pre_scale_std": 0.10768669247627258,
|
|
"signal/advantage_std": 0.10768669247627258,
|
|
"signal/brier_reward/centered_abs_mean": 0.1326069116592407,
|
|
"signal/brier_reward/group_bin_occupancy": 0.855859375,
|
|
"signal/brier_reward/group_std_mean": 0.1697738140821457,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013260690867900849,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013260690867900849,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013172254525125026,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85703125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017005473747849463,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013172254897654057,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013172254897654057,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002756138565018773,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70078125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004585301177576185,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4451731698936784e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4451731698936784e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1743350952863693,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22367313802242278,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021791885839775203,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021791885839775203,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1743350952863693,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22367313802242278,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021791885839775203,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021791885839775203,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17374208867549895,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22294133603572847,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021717761643230914,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021717761643230914,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17231329381465912,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.880078125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22113268971443176,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021539161913096904,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021539161913096904,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15182446539402009,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.195146906375885,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018978057894855737,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018978057894855737,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07687772065401077,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.902734375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09964980781078339,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000960971531458199,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000960971531458199,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1743350952863693,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22367313802242278,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021791885839775203,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021791885839775203,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005466759670525789,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.666015625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007180223613977433,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005466759903356433,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005466759903356433,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18491021990776063,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2515652894973755,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018491022288799286,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018491022288799286,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30381564409048867,
|
|
"calibration/batch_distribution_entropy": 0.9791813887040892,
|
|
"calibration/batch_entropy_100bins": 0.9456414506810381,
|
|
"calibration/batch_entropy_10bins": 0.9791813887040892,
|
|
"calibration/batch_entropy_50bins": 0.970168505212247,
|
|
"calibration/batch_uniqueness": 0.961700439453125,
|
|
"calibration/buffer_distribution_entropy": 0.9989108611243351,
|
|
"calibration/buffer_entropy_100bins": 0.9926973216876972,
|
|
"calibration/buffer_entropy_10bins": 0.9989108611243351,
|
|
"calibration/buffer_entropy_50bins": 0.9970219902732639,
|
|
"calibration/confidence_entropy": 0.4907718157996378,
|
|
"calibration/coverage@0%": 0.008203125,
|
|
"calibration/coverage@1%": 0.008203125,
|
|
"calibration/coverage@10%": 0.10078125,
|
|
"calibration/coverage@15%": 0.197265625,
|
|
"calibration/coverage@20%": 0.26953125,
|
|
"calibration/coverage@25%": 0.36171875,
|
|
"calibration/coverage@30%": 0.4375,
|
|
"calibration/coverage@5%": 0.044921875,
|
|
"calibration/ece": 0.13147094825039063,
|
|
"calibration/mean_confidence": 0.4795633160871094,
|
|
"calibration/prompt_uniqueness": 0.862939453125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 779.8,
|
|
"completions/max_terminated_length": 566.2,
|
|
"completions/mean_length": 213.07626953125,
|
|
"completions/mean_terminated_length": 212.94696350097655,
|
|
"completions/min_length": 101.2,
|
|
"completions/min_terminated_length": 101.2,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.000700996839441359,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 711878255.0,
|
|
"reward": 0.9307149410247803,
|
|
"reward_std": 0.07861108779907226,
|
|
"rewards/accuracy_reward": 0.5162109375,
|
|
"rewards/brier_reward": 0.7959470629692078,
|
|
"rewards/confidence_uniqueness_reward": 0.961796760559082,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002726683299988508,
|
|
"rewards/frontier_coverage_0": 0.1381845772266388,
|
|
"rewards/frontier_coverage_1": 0.1381845772266388,
|
|
"rewards/frontier_coverage_10": 0.13760078251361846,
|
|
"rewards/frontier_coverage_15": 0.13690231442451478,
|
|
"rewards/frontier_coverage_20": 0.12530189156532287,
|
|
"rewards/frontier_coverage_25": 0.07160148993134499,
|
|
"rewards/frontier_coverage_5": 0.1381845772266388,
|
|
"rewards/frontier_ece_reward": 0.004131518257781863,
|
|
"rewards/frontier_entropy_batch_reward": -0.1456966444849968,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09827880859375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12960606515407563,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049139404296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049139404296875,
|
|
"signal/advantage_abs_mean": 0.06056781709194183,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06056781709194183,
|
|
"signal/advantage_pre_scale_std": 0.09958604127168655,
|
|
"signal/advantage_std": 0.09958604127168655,
|
|
"signal/brier_reward/centered_abs_mean": 0.13191191256046295,
|
|
"signal/brier_reward/group_bin_occupancy": 0.82578125,
|
|
"signal/brier_reward/group_std_mean": 0.16879949271678923,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013191192038357257,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013191192038357257,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0151775436475873,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.84765625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01969280615448952,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00151775439735502,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00151775439735502,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025179087184369563,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.713671875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00425442517735064,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.147385905322153e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.147385905322153e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18835416436195374,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.852734375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.239266437292099,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002354427147656679,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002354427147656679,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18835416436195374,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.852734375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.239266437292099,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002354427147656679,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002354427147656679,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18741922974586486,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.853125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.238098081946373,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002342740399762988,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002342740399762988,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18593138456344604,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23624544441699982,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023241423536092044,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023241423536092044,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16277650594711304,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.848828125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20805872082710267,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020347062963992357,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020347062963992357,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08195126354694367,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.888671875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10580885410308838,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010243908269330858,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010243908269330858,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18835416436195374,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.852734375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.239266437292099,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002354427147656679,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002354427147656679,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005728235561400652,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.650390625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00733137084171176,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005728235701099038,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005728235701099038,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1957707315683365,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7546875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2600939005613327,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01957707367837429,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01957707367837429,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35507882344852365,
|
|
"calibration/batch_distribution_entropy": 0.9815242485981495,
|
|
"calibration/batch_entropy_100bins": 0.9487245023983595,
|
|
"calibration/batch_entropy_10bins": 0.9815242485981495,
|
|
"calibration/batch_entropy_50bins": 0.9749153032528938,
|
|
"calibration/batch_uniqueness": 0.9653167724609375,
|
|
"calibration/buffer_distribution_entropy": 0.9989997128991707,
|
|
"calibration/buffer_entropy_100bins": 0.9919129656798372,
|
|
"calibration/buffer_entropy_10bins": 0.9989997128991707,
|
|
"calibration/buffer_entropy_50bins": 0.99690645212335,
|
|
"calibration/confidence_entropy": 0.5100705905072789,
|
|
"calibration/coverage@0%": 0.001953125,
|
|
"calibration/coverage@1%": 0.001953125,
|
|
"calibration/coverage@10%": 0.08671875,
|
|
"calibration/coverage@15%": 0.11640625,
|
|
"calibration/coverage@20%": 0.165234375,
|
|
"calibration/coverage@25%": 0.349609375,
|
|
"calibration/coverage@30%": 0.53671875,
|
|
"calibration/coverage@5%": 0.001953125,
|
|
"calibration/ece": 0.13693945359140625,
|
|
"calibration/mean_confidence": 0.48373966934374996,
|
|
"calibration/prompt_uniqueness": 0.875537109375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 605.0,
|
|
"completions/max_terminated_length": 605.0,
|
|
"completions/mean_length": 214.97587890625,
|
|
"completions/mean_terminated_length": 214.97587890625,
|
|
"completions/min_length": 101.6,
|
|
"completions/min_terminated_length": 101.6,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0009223796660080552,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 729033528.0,
|
|
"reward": 0.9396338343620301,
|
|
"reward_std": 0.07829283773899079,
|
|
"rewards/accuracy_reward": 0.5333984375,
|
|
"rewards/brier_reward": 0.7883359670639039,
|
|
"rewards/confidence_uniqueness_reward": 0.9647911071777344,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0029629127122461794,
|
|
"rewards/frontier_coverage_0": 0.10885621532797814,
|
|
"rewards/frontier_coverage_1": 0.10885621532797814,
|
|
"rewards/frontier_coverage_10": 0.10867343470454216,
|
|
"rewards/frontier_coverage_15": 0.10741532370448112,
|
|
"rewards/frontier_coverage_20": 0.09064158499240875,
|
|
"rewards/frontier_coverage_25": 0.05339468345046043,
|
|
"rewards/frontier_coverage_5": 0.10885621532797814,
|
|
"rewards/frontier_ece_reward": 0.0036856223829090594,
|
|
"rewards/frontier_entropy_batch_reward": -0.11293288618326187,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09931640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1314438134431839,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049658203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049658203125,
|
|
"signal/advantage_abs_mean": 0.06019651964306831,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06019651964306831,
|
|
"signal/advantage_pre_scale_std": 0.10152793973684311,
|
|
"signal/advantage_std": 0.10152793973684311,
|
|
"signal/brier_reward/centered_abs_mean": 0.12910507768392562,
|
|
"signal/brier_reward/group_bin_occupancy": 0.839453125,
|
|
"signal/brier_reward/group_std_mean": 0.16621757447719573,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012910507991909981,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012910507991909981,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013120555877685547,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8921875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016461556777358055,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001312055578455329,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001312055578455329,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002895374782383442,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0050206175073981285,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6192184779793024e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6192184779793024e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17723233103752137,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22633326649665833,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022154041100293396,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022154041100293396,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17723233103752137,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22633326649665833,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022154041100293396,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022154041100293396,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17641493380069734,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.861328125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2253005772829056,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022051867563277483,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022051867563277483,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17494137585163116,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.861328125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2234204888343811,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021867671981453895,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021867671981453895,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14532059729099273,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.855859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18619788587093353,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001816507545299828,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001816507545299828,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07450771033763885,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09580143839120865,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009313463466241956,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009313463466241956,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17723233103752137,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22633326649665833,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022154041100293396,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022154041100293396,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005490542016923427,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.653515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006997937150299549,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005490542040206492,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005490542040206492,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17429947555065156,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7765625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2336766630411148,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017429948039352893,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017429948039352893,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25269958125350817,
|
|
"calibration/batch_distribution_entropy": 0.977598008234402,
|
|
"calibration/batch_entropy_100bins": 0.9456781551973619,
|
|
"calibration/batch_entropy_10bins": 0.977598008234402,
|
|
"calibration/batch_entropy_50bins": 0.972391770560862,
|
|
"calibration/batch_uniqueness": 0.964692861372854,
|
|
"calibration/buffer_distribution_entropy": 0.9990104211633648,
|
|
"calibration/buffer_entropy_100bins": 0.9910756654810251,
|
|
"calibration/buffer_entropy_10bins": 0.9990104211633648,
|
|
"calibration/buffer_entropy_50bins": 0.9967262526332157,
|
|
"calibration/confidence_entropy": 0.47842673135918395,
|
|
"calibration/coverage@0%": 0.006640625,
|
|
"calibration/coverage@1%": 0.006640625,
|
|
"calibration/coverage@10%": 0.081640625,
|
|
"calibration/coverage@15%": 0.21738243028375734,
|
|
"calibration/coverage@20%": 0.3651242967221135,
|
|
"calibration/coverage@25%": 0.5608656433463797,
|
|
"calibration/coverage@30%": 0.7155829562133073,
|
|
"calibration/coverage@5%": 0.006640625,
|
|
"calibration/ece": 0.08634725362059685,
|
|
"calibration/mean_confidence": 0.5315407554914384,
|
|
"calibration/prompt_uniqueness": 0.8584324800013008,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 998.4,
|
|
"completions/max_terminated_length": 575.6,
|
|
"completions/mean_length": 211.2060546875,
|
|
"completions/mean_terminated_length": 210.94761962890624,
|
|
"completions/min_length": 99.4,
|
|
"completions/min_terminated_length": 99.4,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.000813114398624748,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 746062422.0,
|
|
"reward": 0.9449209213256836,
|
|
"reward_std": 0.07712907642126084,
|
|
"rewards/accuracy_reward": 0.54228515625,
|
|
"rewards/brier_reward": 0.8041618227958679,
|
|
"rewards/confidence_uniqueness_reward": 0.9660219669342041,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0028705994598567488,
|
|
"rewards/frontier_coverage_0": 0.11365007087588311,
|
|
"rewards/frontier_coverage_1": 0.11365007087588311,
|
|
"rewards/frontier_coverage_10": 0.11318954974412918,
|
|
"rewards/frontier_coverage_15": 0.1123675525188446,
|
|
"rewards/frontier_coverage_20": 0.09421491771936416,
|
|
"rewards/frontier_coverage_25": 0.05829355418682099,
|
|
"rewards/frontier_coverage_5": 0.11365007087588311,
|
|
"rewards/frontier_ece_reward": 0.004210776835680008,
|
|
"rewards/frontier_entropy_batch_reward": -0.12466455399990081,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.089410400390625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1160278245806694,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0447052001953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0447052001953125,
|
|
"signal/advantage_abs_mean": 0.06003868728876114,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06003868728876114,
|
|
"signal/advantage_pre_scale_std": 0.10074197500944138,
|
|
"signal/advantage_std": 0.10074197500944138,
|
|
"signal/brier_reward/centered_abs_mean": 0.12351735383272171,
|
|
"signal/brier_reward/group_bin_occupancy": 0.848046875,
|
|
"signal/brier_reward/group_std_mean": 0.15841708183288575,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012351735681295394,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012351735681295394,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012723441608250141,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8484375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01709325034171343,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012723441468551755,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012723441468551755,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028855173382908105,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71953125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004696205072104931,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.606896862038411e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.606896862038411e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15936529040336608,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20337989330291747,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00199206608813256,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00199206608813256,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15936529040336608,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20337989330291747,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00199206608813256,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00199206608813256,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15849925875663756,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2023030012845993,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019812406972050667,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019812406972050667,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15680376291275025,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2001950114965439,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001960047124885023,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001960047124885023,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.12259230017662048,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.848046875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15736171305179597,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015324037754908203,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015324037754908203,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06701800152659416,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.911328125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08619940429925918,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008377250749617815,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008377250749617815,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15936529040336608,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20337989330291747,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00199206608813256,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00199206608813256,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0054263660684227945,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.649609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006871442683041096,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005426366347819567,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005426366347819567,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17846384644508362,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23761946856975555,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01784638427197933,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01784638427197933,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24251946895628765,
|
|
"calibration/batch_distribution_entropy": 0.9893663529287997,
|
|
"calibration/batch_entropy_100bins": 0.9515162266162939,
|
|
"calibration/batch_entropy_10bins": 0.9893663529287997,
|
|
"calibration/batch_entropy_50bins": 0.9806845012920032,
|
|
"calibration/batch_uniqueness": 0.9671478271484375,
|
|
"calibration/buffer_distribution_entropy": 0.9989959337433355,
|
|
"calibration/buffer_entropy_100bins": 0.9900516903881215,
|
|
"calibration/buffer_entropy_10bins": 0.9989959337433355,
|
|
"calibration/buffer_entropy_50bins": 0.9964595474824691,
|
|
"calibration/confidence_entropy": 0.49330079316439557,
|
|
"calibration/coverage@0%": 0.040234375,
|
|
"calibration/coverage@1%": 0.05234375,
|
|
"calibration/coverage@10%": 0.209765625,
|
|
"calibration/coverage@15%": 0.328515625,
|
|
"calibration/coverage@20%": 0.45078125,
|
|
"calibration/coverage@25%": 0.54765625,
|
|
"calibration/coverage@30%": 0.640625,
|
|
"calibration/coverage@5%": 0.1265625,
|
|
"calibration/ece": 0.12774077019609376,
|
|
"calibration/mean_confidence": 0.5107396985539063,
|
|
"calibration/prompt_uniqueness": 0.870361328125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 466.6,
|
|
"completions/max_terminated_length": 466.6,
|
|
"completions/mean_length": 209.7314453125,
|
|
"completions/mean_terminated_length": 209.7314453125,
|
|
"completions/min_length": 95.4,
|
|
"completions/min_terminated_length": 95.4,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0009440298308618367,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 763219928.0,
|
|
"reward": 0.9544408559799195,
|
|
"reward_std": 0.07761229127645493,
|
|
"rewards/accuracy_reward": 0.56044921875,
|
|
"rewards/brier_reward": 0.8082213044166565,
|
|
"rewards/confidence_uniqueness_reward": 0.9663070678710938,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0023990374989807605,
|
|
"rewards/frontier_coverage_0": 0.10608717054128647,
|
|
"rewards/frontier_coverage_1": 0.10608717054128647,
|
|
"rewards/frontier_coverage_10": 0.10574176013469697,
|
|
"rewards/frontier_coverage_15": 0.10523617118597031,
|
|
"rewards/frontier_coverage_20": 0.08412261456251144,
|
|
"rewards/frontier_coverage_25": 0.05517871528863907,
|
|
"rewards/frontier_coverage_5": 0.10608717054128647,
|
|
"rewards/frontier_ece_reward": 0.00386471445672214,
|
|
"rewards/frontier_entropy_batch_reward": -0.119498211145401,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.094354248046875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.171484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12810063362121582,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0471771240234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0471771240234375,
|
|
"signal/advantage_abs_mean": 0.05871965810656547,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05871965810656547,
|
|
"signal/advantage_pre_scale_std": 0.09942338019609451,
|
|
"signal/advantage_std": 0.09942338019609451,
|
|
"signal/brier_reward/centered_abs_mean": 0.11699345856904983,
|
|
"signal/brier_reward/group_bin_occupancy": 0.856640625,
|
|
"signal/brier_reward/group_std_mean": 0.150360769033432,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011699345521628856,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011699345521628856,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012291359901428222,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8765625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015424015931785107,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012291359947994352,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012291359947994352,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023271431447938083,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003916347119957209,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.908928909164388e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.908928909164388e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16398767232894898,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2096702426671982,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002049846015870571,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002049846015870571,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16398767232894898,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2096702426671982,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002049846015870571,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002049846015870571,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16326985955238343,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20876802504062653,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020408732816576958,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020408732816576958,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1617922306060791,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20689028203487397,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002022402756847441,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002022402756847441,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1167424589395523,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.858984375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15021034181118012,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014592807507142424,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014592807507142424,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06388061791658402,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.919921875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08200441002845764,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007985077565535903,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007985077565535903,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16398767232894898,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2096702426671982,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002049846015870571,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002049846015870571,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004942002054303884,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.648828125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006254712212830782,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004942002124153078,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004942002124153078,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1822981506586075,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.770703125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24075571000576018,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018229815922677518,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018229815922677518,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.265978066281768,
|
|
"calibration/batch_distribution_entropy": 0.9789357360503427,
|
|
"calibration/batch_entropy_100bins": 0.9431321978969809,
|
|
"calibration/batch_entropy_10bins": 0.9789357360503427,
|
|
"calibration/batch_entropy_50bins": 0.9697579699878605,
|
|
"calibration/batch_uniqueness": 0.9652801513671875,
|
|
"calibration/buffer_distribution_entropy": 0.9990033919158412,
|
|
"calibration/buffer_entropy_100bins": 0.9889349207161073,
|
|
"calibration/buffer_entropy_10bins": 0.9990033919158412,
|
|
"calibration/buffer_entropy_50bins": 0.9962503665279716,
|
|
"calibration/confidence_entropy": 0.48441087660579196,
|
|
"calibration/coverage@0%": 0.012109375,
|
|
"calibration/coverage@1%": 0.012109375,
|
|
"calibration/coverage@10%": 0.071875,
|
|
"calibration/coverage@15%": 0.2671875,
|
|
"calibration/coverage@20%": 0.358984375,
|
|
"calibration/coverage@25%": 0.51796875,
|
|
"calibration/coverage@30%": 0.63671875,
|
|
"calibration/coverage@5%": 0.030859375,
|
|
"calibration/ece": 0.11656946337539065,
|
|
"calibration/mean_confidence": 0.5445783087582032,
|
|
"calibration/prompt_uniqueness": 0.864501953125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 652.8,
|
|
"completions/max_terminated_length": 652.8,
|
|
"completions/mean_length": 206.99716796875,
|
|
"completions/mean_terminated_length": 206.99716796875,
|
|
"completions/min_length": 96.0,
|
|
"completions/min_terminated_length": 96.0,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0008532933425158262,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 780279163.0,
|
|
"reward": 0.9536020278930664,
|
|
"reward_std": 0.07359256446361542,
|
|
"rewards/accuracy_reward": 0.55859375,
|
|
"rewards/brier_reward": 0.7990551352500915,
|
|
"rewards/confidence_uniqueness_reward": 0.9658401489257813,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002780623361468315,
|
|
"rewards/frontier_coverage_0": 0.10266576707363129,
|
|
"rewards/frontier_coverage_1": 0.10266576707363129,
|
|
"rewards/frontier_coverage_10": 0.10229146480560303,
|
|
"rewards/frontier_coverage_15": 0.10120062232017517,
|
|
"rewards/frontier_coverage_20": 0.08240518420934677,
|
|
"rewards/frontier_coverage_25": 0.055143621563911435,
|
|
"rewards/frontier_coverage_5": 0.10260389745235443,
|
|
"rewards/frontier_ece_reward": 0.003426346043124795,
|
|
"rewards/frontier_entropy_batch_reward": -0.10604460686445236,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08675537109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11478563249111176,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043377685546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.043377685546875,
|
|
"signal/advantage_abs_mean": 0.056449040025472644,
|
|
"signal/advantage_pre_scale_abs_mean": 0.056449040025472644,
|
|
"signal/advantage_pre_scale_std": 0.09533527195453644,
|
|
"signal/advantage_std": 0.09533527195453644,
|
|
"signal/brier_reward/centered_abs_mean": 0.12223577499389648,
|
|
"signal/brier_reward/group_bin_occupancy": 0.841015625,
|
|
"signal/brier_reward/group_std_mean": 0.15614095330238342,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012223577871918679,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012223577871918679,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012729287147521973,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.867578125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016052869893610478,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012729287147521973,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012729287147521973,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026509141782298682,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72109375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004276033490896225,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.313642882858403e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.313642882858403e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1618587166070938,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20602332055568695,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002023234078660607,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002023234078660607,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1618587166070938,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20602332055568695,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002023234078660607,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002023234078660607,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16095443964004516,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2048912912607193,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020119305001571773,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020119305001571773,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15913594663143157,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2025930851697922,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019891994539648294,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019891994539648294,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11051186323165893,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14187619388103484,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013813983183354138,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013813983183354138,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06287488490343093,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.921484375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08044356256723403,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007859360543079674,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007859360543079674,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16175731718540193,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20589107573032378,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00202196657191962,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00202196657191962,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004867816995829344,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.647265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.006159187015146017,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00048678170423954725,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00048678170423954725,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16900931298732758,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.761328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.23038658797740935,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.016900931484997272,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.016900931484997272,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28684687224873423,
|
|
"calibration/batch_distribution_entropy": 0.9762817415979625,
|
|
"calibration/batch_entropy_100bins": 0.9470552678321782,
|
|
"calibration/batch_entropy_10bins": 0.9762817415979625,
|
|
"calibration/batch_entropy_50bins": 0.9694219542495217,
|
|
"calibration/batch_uniqueness": 0.9615577406598019,
|
|
"calibration/buffer_distribution_entropy": 0.9989136260467595,
|
|
"calibration/buffer_entropy_100bins": 0.9878533321218999,
|
|
"calibration/buffer_entropy_10bins": 0.9989136260467595,
|
|
"calibration/buffer_entropy_50bins": 0.995948725002774,
|
|
"calibration/confidence_entropy": 0.4667618109383035,
|
|
"calibration/coverage@0%": 0.009766389432485322,
|
|
"calibration/coverage@1%": 0.009766389432485322,
|
|
"calibration/coverage@10%": 0.11367263943248532,
|
|
"calibration/coverage@15%": 0.20313723091976515,
|
|
"calibration/coverage@20%": 0.3556522137964775,
|
|
"calibration/coverage@25%": 0.4596180895303327,
|
|
"calibration/coverage@30%": 0.5514562438845401,
|
|
"calibration/coverage@5%": 0.009766389432485322,
|
|
"calibration/ece": 0.1238677625992218,
|
|
"calibration/mean_confidence": 0.46819689665010245,
|
|
"calibration/prompt_uniqueness": 0.8621210104220864,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 685.6,
|
|
"completions/max_terminated_length": 473.4,
|
|
"completions/mean_length": 205.50390625,
|
|
"completions/mean_terminated_length": 205.37394104003906,
|
|
"completions/min_length": 91.8,
|
|
"completions/min_terminated_length": 91.8,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0009124129428528249,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 797610723.0,
|
|
"reward": 0.951321005821228,
|
|
"reward_std": 0.07593754529953003,
|
|
"rewards/accuracy_reward": 0.56328125,
|
|
"rewards/brier_reward": 0.794294559955597,
|
|
"rewards/confidence_uniqueness_reward": 0.9645552635192871,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0026896405033767223,
|
|
"rewards/frontier_coverage_0": 0.09672394786030054,
|
|
"rewards/frontier_coverage_1": 0.09672394786030054,
|
|
"rewards/frontier_coverage_10": 0.09659410417079925,
|
|
"rewards/frontier_coverage_15": 0.09543101899325848,
|
|
"rewards/frontier_coverage_20": 0.0684605619404465,
|
|
"rewards/frontier_coverage_25": 0.04791465476155281,
|
|
"rewards/frontier_coverage_5": 0.09664845261722803,
|
|
"rewards/frontier_ece_reward": 0.0033146409783512353,
|
|
"rewards/frontier_entropy_batch_reward": -0.1393482729792595,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0828857421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1134518638253212,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04144287109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04144287109375,
|
|
"signal/advantage_abs_mean": 0.057659880816936494,
|
|
"signal/advantage_pre_scale_abs_mean": 0.057659880816936494,
|
|
"signal/advantage_pre_scale_std": 0.09769354313611985,
|
|
"signal/advantage_std": 0.09769354313611985,
|
|
"signal/brier_reward/centered_abs_mean": 0.11685173511505127,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8453125,
|
|
"signal/brier_reward/group_std_mean": 0.15071351826190948,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011685173958539963,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011685173958539963,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013762599974870681,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87109375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017685002461075783,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013762600487098099,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013762600487098099,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002494478039443493,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71171875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0040936945006251335,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.118097665719688e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.118097665719688e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1533576190471649,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87265625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19774354100227357,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00191697021946311,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00191697021946311,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1533576190471649,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87265625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19774354100227357,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00191697021946311,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00191697021946311,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15258175432682036,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.871484375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19675518870353698,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019072720315307379,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019072720315307379,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15079548060894013,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19448258876800537,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018849435495212675,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018849435495212675,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10275738835334777,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13350152522325515,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012844673823565246,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012844673823565246,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.060180126875638965,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.918359375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07750741690397263,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007522516185417772,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007522516185417772,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15328127443790435,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87265625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19764436781406403,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019160159630700947,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019160159630700947,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004672563914209604,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.64765625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005903707630932331,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004672564100474119,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004672564100474119,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2062768131494522,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2728250831365585,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02062768116593361,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02062768116593361,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27721551684829293,
|
|
"calibration/batch_distribution_entropy": 0.9814844594182706,
|
|
"calibration/batch_entropy_100bins": 0.9523715157977393,
|
|
"calibration/batch_entropy_10bins": 0.9814844594182706,
|
|
"calibration/batch_entropy_50bins": 0.9751030931904451,
|
|
"calibration/batch_uniqueness": 0.9663785107122871,
|
|
"calibration/buffer_distribution_entropy": 0.9989660116076141,
|
|
"calibration/buffer_entropy_100bins": 0.9868567401278003,
|
|
"calibration/buffer_entropy_10bins": 0.9989660116076141,
|
|
"calibration/buffer_entropy_50bins": 0.9958389798404077,
|
|
"calibration/confidence_entropy": 0.5004450542923582,
|
|
"calibration/coverage@0%": 0.07227632705479452,
|
|
"calibration/coverage@1%": 0.10938570205479452,
|
|
"calibration/coverage@10%": 0.21955647627201563,
|
|
"calibration/coverage@15%": 0.2676087022994129,
|
|
"calibration/coverage@20%": 0.33362891389432486,
|
|
"calibration/coverage@25%": 0.4402993517612524,
|
|
"calibration/coverage@30%": 0.5215891022504893,
|
|
"calibration/coverage@5%": 0.15978091364970645,
|
|
"calibration/ece": 0.16065055711412365,
|
|
"calibration/mean_confidence": 0.48663652354730924,
|
|
"calibration/prompt_uniqueness": 0.880144990569719,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 733.4,
|
|
"completions/max_terminated_length": 535.2,
|
|
"completions/mean_length": 210.1384765625,
|
|
"completions/mean_terminated_length": 210.0097869873047,
|
|
"completions/min_length": 94.4,
|
|
"completions/min_terminated_length": 94.4,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0009373911889269948,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 814695245.0,
|
|
"reward": 0.9313406467437744,
|
|
"reward_std": 0.07817895561456681,
|
|
"rewards/accuracy_reward": 0.514453125,
|
|
"rewards/brier_reward": 0.8032928228378295,
|
|
"rewards/confidence_uniqueness_reward": 0.9653749227523803,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002717023529112339,
|
|
"rewards/frontier_coverage_0": 0.1328577607870102,
|
|
"rewards/frontier_coverage_1": 0.1328577607870102,
|
|
"rewards/frontier_coverage_10": 0.13257997035980223,
|
|
"rewards/frontier_coverage_15": 0.13016380369663239,
|
|
"rewards/frontier_coverage_20": 0.09398490190505981,
|
|
"rewards/frontier_coverage_25": 0.057968994975090025,
|
|
"rewards/frontier_coverage_5": 0.1328577607870102,
|
|
"rewards/frontier_ece_reward": 0.003562742657959461,
|
|
"rewards/frontier_entropy_batch_reward": -0.13192067593336104,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08997802734375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12250371724367141,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044989013671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044989013671875,
|
|
"signal/advantage_abs_mean": 0.0592110738158226,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0592110738158226,
|
|
"signal/advantage_pre_scale_std": 0.10076282024383545,
|
|
"signal/advantage_std": 0.10076282024383545,
|
|
"signal/brier_reward/centered_abs_mean": 0.1197770431637764,
|
|
"signal/brier_reward/group_bin_occupancy": 0.852734375,
|
|
"signal/brier_reward/group_std_mean": 0.15326233208179474,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011977704800665378,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011977704800665378,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012569081410765648,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016215594485402107,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012569081503897906,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012569081503897906,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002442924352362752,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.715234375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004164928989484906,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.053655600524507e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.053655600524507e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1612669676542282,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.871875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2072072833776474,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020158371888101103,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020158371888101103,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1612669676542282,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.871875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2072072833776474,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020158371888101103,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020158371888101103,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16074672639369963,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.871484375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20652774572372437,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020093340426683426,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020093340426683426,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15720563530921935,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20198263525962828,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001965070399455726,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001965070399455726,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10369997471570969,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.13390834033489227,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012962497072294354,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012962497072294354,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06143885999917984,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.928515625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07899406105279923,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007679857430048287,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007679857430048287,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1612669676542282,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.871875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2072072833776474,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020158371888101103,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020158371888101103,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00437048701569438,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.662109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005594444740563631,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000437048717867583,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000437048717867583,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18786839842796327,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.761328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2498662382364273,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01878684014081955,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01878684014081955,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3254199114145034,
|
|
"calibration/batch_distribution_entropy": 0.9789490119022005,
|
|
"calibration/batch_entropy_100bins": 0.9479515107725032,
|
|
"calibration/batch_entropy_10bins": 0.9789490119022005,
|
|
"calibration/batch_entropy_50bins": 0.9707285129333456,
|
|
"calibration/batch_uniqueness": 0.9646381132895101,
|
|
"calibration/buffer_distribution_entropy": 0.9988574494603905,
|
|
"calibration/buffer_entropy_100bins": 0.9858469419972238,
|
|
"calibration/buffer_entropy_10bins": 0.9988574494603905,
|
|
"calibration/buffer_entropy_50bins": 0.995701699935891,
|
|
"calibration/confidence_entropy": 0.4700687621350797,
|
|
"calibration/coverage@0%": 0.041083026960784315,
|
|
"calibration/coverage@1%": 0.09655177696078432,
|
|
"calibration/coverage@10%": 0.21512867647058825,
|
|
"calibration/coverage@15%": 0.2503216911764706,
|
|
"calibration/coverage@20%": 0.2843673406862745,
|
|
"calibration/coverage@25%": 0.3223023897058824,
|
|
"calibration/coverage@30%": 0.4446936274509804,
|
|
"calibration/coverage@5%": 0.147359068627451,
|
|
"calibration/ece": 0.154079998576708,
|
|
"calibration/mean_confidence": 0.5222582954639936,
|
|
"calibration/prompt_uniqueness": 0.8567335069444445,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1005.2,
|
|
"completions/max_terminated_length": 677.6,
|
|
"completions/mean_length": 206.58896484375,
|
|
"completions/mean_terminated_length": 206.06998596191406,
|
|
"completions/min_length": 96.0,
|
|
"completions/min_terminated_length": 96.0,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0008056263905018568,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 831985084.0,
|
|
"reward": 0.9469256639480591,
|
|
"reward_std": 0.0791924849152565,
|
|
"rewards/accuracy_reward": 0.5556640625,
|
|
"rewards/brier_reward": 0.785396134853363,
|
|
"rewards/confidence_uniqueness_reward": 0.9655014038085937,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0030867070890963078,
|
|
"rewards/frontier_coverage_0": 0.08916651010513306,
|
|
"rewards/frontier_coverage_1": 0.08916651010513306,
|
|
"rewards/frontier_coverage_10": 0.08891836106777191,
|
|
"rewards/frontier_coverage_15": 0.08784883618354797,
|
|
"rewards/frontier_coverage_20": 0.0627571128308773,
|
|
"rewards/frontier_coverage_25": 0.04613062590360641,
|
|
"rewards/frontier_coverage_5": 0.08916651010513306,
|
|
"rewards/frontier_ece_reward": 0.0026697968831285836,
|
|
"rewards/frontier_entropy_batch_reward": -0.1289479538798332,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09161376953125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.170703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12455501407384872,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045806884765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045806884765625,
|
|
"signal/advantage_abs_mean": 0.05967723429203033,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05967723429203033,
|
|
"signal/advantage_pre_scale_std": 0.10045831054449081,
|
|
"signal/advantage_std": 0.10045831054449081,
|
|
"signal/brier_reward/centered_abs_mean": 0.12030695676803589,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85625,
|
|
"signal/brier_reward/group_std_mean": 0.15410683453083038,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01203069593757391,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01203069593757391,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013250499032437801,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.867578125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018012562207877635,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013250499032437801,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013250499032437801,
|
|
"signal/format_reward/centered_abs_mean": 0.000933837890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.002425827318802476,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028906268067657947,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7140625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004852446913719177,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.613283406593837e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.613283406593837e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16021213233470916,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2059779554605484,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020026518031954765,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020026518031954765,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16021213233470916,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2059779554605484,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020026518031954765,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020026518031954765,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15919291973114014,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20466985404491425,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001989911496639252,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001989911496639252,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15465636551380157,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19887435138225557,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019332046154886483,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019332046154886483,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09457768499851227,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8609375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12258463650941849,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001182221109047532,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001182221109047532,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05887412428855896,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.93203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0750406637787819,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007359265931881964,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007359265931881964,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16021213233470916,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2059779554605484,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020026518031954765,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020026518031954765,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004312580823898316,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00546288751065731,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00043125808006152513,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00043125808006152513,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1936631292104721,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25820142924785616,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01936631351709366,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01936631351709366,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1967928711004556,
|
|
"calibration/batch_distribution_entropy": 0.9742510706268096,
|
|
"calibration/batch_entropy_100bins": 0.9507340151497976,
|
|
"calibration/batch_entropy_10bins": 0.9742510706268096,
|
|
"calibration/batch_entropy_50bins": 0.972856437794905,
|
|
"calibration/batch_uniqueness": 0.9643646240234375,
|
|
"calibration/buffer_distribution_entropy": 0.9987200173841982,
|
|
"calibration/buffer_entropy_100bins": 0.9845965356019161,
|
|
"calibration/buffer_entropy_10bins": 0.9987200173841982,
|
|
"calibration/buffer_entropy_50bins": 0.9954272022225975,
|
|
"calibration/confidence_entropy": 0.4909248938909892,
|
|
"calibration/coverage@0%": 0.046875,
|
|
"calibration/coverage@1%": 0.046875,
|
|
"calibration/coverage@10%": 0.280078125,
|
|
"calibration/coverage@15%": 0.51328125,
|
|
"calibration/coverage@20%": 0.592578125,
|
|
"calibration/coverage@25%": 0.655859375,
|
|
"calibration/coverage@30%": 0.7453125,
|
|
"calibration/coverage@5%": 0.145703125,
|
|
"calibration/ece": 0.11571823238789063,
|
|
"calibration/mean_confidence": 0.4886489551121094,
|
|
"calibration/prompt_uniqueness": 0.86171875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 576.6,
|
|
"completions/max_terminated_length": 576.6,
|
|
"completions/mean_length": 207.0623046875,
|
|
"completions/mean_terminated_length": 207.0623046875,
|
|
"completions/min_length": 96.8,
|
|
"completions/min_terminated_length": 96.8,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0010407047811895609,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 849115962.0,
|
|
"reward": 0.9649770140647889,
|
|
"reward_std": 0.07340085953474045,
|
|
"rewards/accuracy_reward": 0.58837890625,
|
|
"rewards/brier_reward": 0.8119970202445984,
|
|
"rewards/confidence_uniqueness_reward": 0.9659217834472656,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.00260907681658864,
|
|
"rewards/frontier_coverage_0": 0.0923803374171257,
|
|
"rewards/frontier_coverage_1": 0.0923803374171257,
|
|
"rewards/frontier_coverage_10": 0.09237077087163925,
|
|
"rewards/frontier_coverage_15": 0.08902214169502258,
|
|
"rewards/frontier_coverage_20": 0.06303619369864463,
|
|
"rewards/frontier_coverage_25": 0.04901153296232223,
|
|
"rewards/frontier_coverage_5": 0.0923803374171257,
|
|
"rewards/frontier_ece_reward": 0.0028331642039120196,
|
|
"rewards/frontier_entropy_batch_reward": -0.14387290179729462,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084429931640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.165234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11211418360471725,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422149658203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422149658203125,
|
|
"signal/advantage_abs_mean": 0.05633275434374809,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05633275434374809,
|
|
"signal/advantage_pre_scale_std": 0.09726964086294174,
|
|
"signal/advantage_std": 0.09726964086294174,
|
|
"signal/brier_reward/centered_abs_mean": 0.10657454878091813,
|
|
"signal/brier_reward/group_bin_occupancy": 0.852734375,
|
|
"signal/brier_reward/group_std_mean": 0.13808045983314515,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010657455027103423,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010657455027103423,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012256479263305664,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.877734375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015532337687909603,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012256479589268566,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012256479589268566,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024628740502521396,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.696875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004201717115938664,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.078592744714115e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.078592744714115e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14268072247505187,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1830669164657593,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017835090635344385,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017835090635344385,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14268072247505187,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1830669164657593,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017835090635344385,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017835090635344385,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1418178841471672,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.875390625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18199937641620637,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001772723556496203,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001772723556496203,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1328089103102684,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.871875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17064056396484376,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016601114068180323,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016601114068180323,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08025226593017579,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10454665571451187,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00100315329618752,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00100315329618752,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.051503103226423264,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06675118654966354,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006437887786887586,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006437887786887586,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14268072247505187,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87421875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1830669164657593,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017835090635344385,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017835090635344385,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004069770174100995,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.637890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005134090967476368,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004069770220667124,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004069770220667124,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19587229192256927,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74921875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2571956992149353,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01958722956478596,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01958722956478596,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.4496095895830745,
|
|
"eval_calibration/batch_distribution_entropy": 0.9144992675252371,
|
|
"eval_calibration/batch_entropy_100bins": 0.7043401650925953,
|
|
"eval_calibration/batch_entropy_10bins": 0.9144992675252371,
|
|
"eval_calibration/batch_entropy_50bins": 0.7778935861386687,
|
|
"eval_calibration/batch_uniqueness": 0.9091796875,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9987979675445213,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9837722377098301,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9987979675445213,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9953801956155197,
|
|
"eval_calibration/confidence_entropy": 0.5099341289029582,
|
|
"eval_calibration/coverage@0%": 0.0546875,
|
|
"eval_calibration/coverage@1%": 0.0546875,
|
|
"eval_calibration/coverage@10%": 0.0546875,
|
|
"eval_calibration/coverage@15%": 0.0546875,
|
|
"eval_calibration/coverage@20%": 0.0625,
|
|
"eval_calibration/coverage@25%": 0.203125,
|
|
"eval_calibration/coverage@30%": 0.296875,
|
|
"eval_calibration/coverage@5%": 0.0546875,
|
|
"eval_calibration/ece": 0.19125000000000003,
|
|
"eval_calibration/mean_confidence": 0.42765624999999996,
|
|
"eval_calibration/prompt_uniqueness": 0.9091796875,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 418.0,
|
|
"eval_completions/max_terminated_length": 418.0,
|
|
"eval_completions/mean_length": 215.94012451171875,
|
|
"eval_completions/mean_terminated_length": 215.94012451171875,
|
|
"eval_completions/min_length": 121.75,
|
|
"eval_completions/min_terminated_length": 121.75,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 849115962.0,
|
|
"eval_reward": 0.8419463336467743,
|
|
"eval_reward_std": 0.23093872889876366,
|
|
"eval_rewards/accuracy_reward": 0.44140625,
|
|
"eval_rewards/brier_reward": 0.797715574502945,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.91015625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0027635535807348788,
|
|
"eval_rewards/frontier_coverage_0": 0.16548816114664078,
|
|
"eval_rewards/frontier_coverage_1": 0.16548816114664078,
|
|
"eval_rewards/frontier_coverage_10": 0.16445041447877884,
|
|
"eval_rewards/frontier_coverage_15": 0.15276920050382614,
|
|
"eval_rewards/frontier_coverage_20": 0.09404854476451874,
|
|
"eval_rewards/frontier_coverage_25": 0.05035925842821598,
|
|
"eval_rewards/frontier_coverage_5": 0.16548816114664078,
|
|
"eval_rewards/frontier_ece_reward": 0.00329311826499179,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6181488037109375,
|
|
"eval_runtime": 21.5949,
|
|
"eval_samples_per_second": 23.154,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.479736328125,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4974188432097435,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2398681640625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2398681640625,
|
|
"eval_signal/advantage_abs_mean": 0.21659140661358833,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21659140661358833,
|
|
"eval_signal/advantage_pre_scale_std": 0.22850319370627403,
|
|
"eval_signal/advantage_std": 0.22850319370627403,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.17879249900579453,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8828125,
|
|
"eval_signal/brier_reward/group_std_mean": 0.23039625957608223,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017879250459372997,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.017879250459372997,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0334930419921875,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.34375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.03927971515804529,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003349304257426411,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003349304257426411,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003261869656853378,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6484375,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006061125197447836,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0773371438262984e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0773371438262984e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3526075705885887,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.434835322201252,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004407594562508166,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004407594562508166,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3526075705885887,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.434835322201252,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004407594562508166,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004407594562508166,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.34960638731718063,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.43143124133348465,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00437007995788008,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00437007995788008,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3217253088951111,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.39926163107156754,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00402156647760421,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00402156647760421,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1837029866874218,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.23896615207195282,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022962873918004334,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022962873918004334,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08366492204368114,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.11145731434226036,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010458114848006517,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010458114848006517,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3526075705885887,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.434835322201252,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004407594562508166,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004407594562508166,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.005593743873760104,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.921875,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.007015189039520919,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005593743990175426,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005593743990175426,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31342506408691406,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33213482052087784,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031342506408691406,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031342506408691406,
|
|
"eval_steps_per_second": 0.185,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22865828436295063,
|
|
"calibration/batch_distribution_entropy": 0.9663037413073996,
|
|
"calibration/batch_entropy_100bins": 0.9474363632028673,
|
|
"calibration/batch_entropy_10bins": 0.9663037413073996,
|
|
"calibration/batch_entropy_50bins": 0.9671213935238819,
|
|
"calibration/batch_uniqueness": 0.96414794921875,
|
|
"calibration/buffer_distribution_entropy": 0.9987440315376857,
|
|
"calibration/buffer_entropy_100bins": 0.9831135431606464,
|
|
"calibration/buffer_entropy_10bins": 0.9987440315376857,
|
|
"calibration/buffer_entropy_50bins": 0.9953063778161239,
|
|
"calibration/confidence_entropy": 0.47730441676015234,
|
|
"calibration/coverage@0%": 0.01484375,
|
|
"calibration/coverage@1%": 0.01484375,
|
|
"calibration/coverage@10%": 0.165234375,
|
|
"calibration/coverage@15%": 0.241015625,
|
|
"calibration/coverage@20%": 0.522265625,
|
|
"calibration/coverage@25%": 0.65703125,
|
|
"calibration/coverage@30%": 0.797265625,
|
|
"calibration/coverage@5%": 0.051953125,
|
|
"calibration/ece": 0.14734528249960935,
|
|
"calibration/mean_confidence": 0.5190219050003906,
|
|
"calibration/prompt_uniqueness": 0.86396484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 461.8,
|
|
"completions/max_terminated_length": 461.8,
|
|
"completions/mean_length": 210.773828125,
|
|
"completions/mean_terminated_length": 210.773828125,
|
|
"completions/min_length": 99.4,
|
|
"completions/min_terminated_length": 99.4,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.0009977244772017002,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 866373454.0,
|
|
"reward": 0.9607008934020996,
|
|
"reward_std": 0.0780528113245964,
|
|
"rewards/accuracy_reward": 0.58310546875,
|
|
"rewards/brier_reward": 0.7890378952026367,
|
|
"rewards/confidence_uniqueness_reward": 0.9668556213378906,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0027697683311998845,
|
|
"rewards/frontier_coverage_0": 0.06974590048193932,
|
|
"rewards/frontier_coverage_1": 0.06974590048193932,
|
|
"rewards/frontier_coverage_10": 0.06944515407085419,
|
|
"rewards/frontier_coverage_15": 0.0675901010632515,
|
|
"rewards/frontier_coverage_20": 0.05418720170855522,
|
|
"rewards/frontier_coverage_25": 0.0440024096518755,
|
|
"rewards/frontier_coverage_5": 0.0696968175470829,
|
|
"rewards/frontier_ece_reward": 0.0021910452749580147,
|
|
"rewards/frontier_entropy_batch_reward": -0.12180853635072708,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091473388671875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12305806428194047,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0457366943359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0457366943359375,
|
|
"signal/advantage_abs_mean": 0.05912850499153137,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05912850499153137,
|
|
"signal/advantage_pre_scale_std": 0.10116375237703323,
|
|
"signal/advantage_std": 0.10116375237703323,
|
|
"signal/brier_reward/centered_abs_mean": 0.11915633082389832,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8625,
|
|
"signal/brier_reward/group_std_mean": 0.15226575136184692,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011915633082389831,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011915633082389831,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01175994873046875,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.014767202734947204,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001175994914956391,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001175994914956391,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026121003553271293,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.709375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004367161309346557,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.265125487814657e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.265125487814657e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1560654640197754,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1990586817264557,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001950818463228643,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001950818463228643,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1560654640197754,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1990586817264557,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001950818463228643,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001950818463228643,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15450561046600342,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19710008800029755,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001931320084258914,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001931320084258914,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14276299774646758,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1824551671743393,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017845374997705222,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017845374997705222,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08745990991592408,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.877734375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11309091001749039,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001093248906545341,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001093248906545341,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05504238083958626,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.92265625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07104835510253907,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006880297674797476,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006880297674797476,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15581389665603637,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1987439811229706,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019476738292723895,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019476738292723895,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003923707129433751,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.658203125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00497177829965949,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003923707117792219,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003923707117792219,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18463816046714782,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2472657859325409,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01846381649374962,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01846381649374962,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2778293031515605,
|
|
"calibration/batch_distribution_entropy": 0.9723484188459602,
|
|
"calibration/batch_entropy_100bins": 0.9456738301824512,
|
|
"calibration/batch_entropy_10bins": 0.9723484188459602,
|
|
"calibration/batch_entropy_50bins": 0.968407087893359,
|
|
"calibration/batch_uniqueness": 0.962615966796875,
|
|
"calibration/buffer_distribution_entropy": 0.9986561322880023,
|
|
"calibration/buffer_entropy_100bins": 0.9815412495509671,
|
|
"calibration/buffer_entropy_10bins": 0.9986561322880023,
|
|
"calibration/buffer_entropy_50bins": 0.9951468722609143,
|
|
"calibration/confidence_entropy": 0.4893640574141166,
|
|
"calibration/coverage@0%": 0.034375,
|
|
"calibration/coverage@1%": 0.034375,
|
|
"calibration/coverage@10%": 0.24140625,
|
|
"calibration/coverage@15%": 0.296875,
|
|
"calibration/coverage@20%": 0.34765625,
|
|
"calibration/coverage@25%": 0.4203125,
|
|
"calibration/coverage@30%": 0.58828125,
|
|
"calibration/coverage@5%": 0.151953125,
|
|
"calibration/ece": 0.11994438798437498,
|
|
"calibration/mean_confidence": 0.480172799515625,
|
|
"calibration/prompt_uniqueness": 0.86845703125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 692.4,
|
|
"completions/max_terminated_length": 512.6,
|
|
"completions/mean_length": 218.40234375,
|
|
"completions/mean_terminated_length": 218.27389831542968,
|
|
"completions/min_length": 100.2,
|
|
"completions/min_terminated_length": 100.2,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0008983217994682491,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 883618246.0,
|
|
"reward": 0.9495777606964111,
|
|
"reward_std": 0.07850262373685837,
|
|
"rewards/accuracy_reward": 0.553125,
|
|
"rewards/brier_reward": 0.8154358863830566,
|
|
"rewards/confidence_uniqueness_reward": 0.9653682827949523,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002407958754338324,
|
|
"rewards/frontier_coverage_0": 0.11853809803724288,
|
|
"rewards/frontier_coverage_1": 0.11853809803724288,
|
|
"rewards/frontier_coverage_10": 0.11834341883659363,
|
|
"rewards/frontier_coverage_15": 0.10681554824113845,
|
|
"rewards/frontier_coverage_20": 0.07330435365438462,
|
|
"rewards/frontier_coverage_25": 0.05658877268433571,
|
|
"rewards/frontier_coverage_5": 0.11847927272319794,
|
|
"rewards/frontier_ece_reward": 0.002967313444241881,
|
|
"rewards/frontier_entropy_batch_reward": -0.14165550619363784,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0945068359375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12645548731088638,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04725341796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04725341796875,
|
|
"signal/advantage_abs_mean": 0.059925134479999545,
|
|
"signal/advantage_pre_scale_abs_mean": 0.059925134479999545,
|
|
"signal/advantage_pre_scale_std": 0.10259477943181991,
|
|
"signal/advantage_std": 0.10259477943181991,
|
|
"signal/brier_reward/centered_abs_mean": 0.10476993620395661,
|
|
"signal/brier_reward/group_bin_occupancy": 0.849609375,
|
|
"signal/brier_reward/group_std_mean": 0.134723761677742,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010476993769407273,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010476993769407273,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012731090188026428,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.86875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016239034570753576,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012731090188026427,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012731090188026427,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002151795034296811,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035023723961785437,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6897438874584623e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6897438874584623e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14922354221343995,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1917937785387039,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018652942962944508,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018652942962944508,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14922354221343995,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1917937785387039,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018652942962944508,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018652942962944508,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14830959737300872,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.861328125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1906294882297516,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018538699485361576,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018538699485361576,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13270397633314132,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.862890625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1706369161605835,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016587997553870082,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016587997553870082,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08176114857196808,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10538419336080551,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010220143478363753,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010220143478363753,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05487861037254334,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.926953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06957651078701019,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006859826273284853,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006859826273284853,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14910914599895478,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1916535586118698,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018638643436133862,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018638643436133862,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003827466629445553,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.633203125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004841751419007778,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003827466571237892,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003827466571237892,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19984618723392486,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7828125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2566663324832916,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019984618946909904,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019984618946909904,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3167713713435312,
|
|
"calibration/batch_distribution_entropy": 0.9752872878231809,
|
|
"calibration/batch_entropy_100bins": 0.9470149013132019,
|
|
"calibration/batch_entropy_10bins": 0.9752872878231809,
|
|
"calibration/batch_entropy_50bins": 0.9694887062876333,
|
|
"calibration/batch_uniqueness": 0.9655956675797309,
|
|
"calibration/buffer_distribution_entropy": 0.9986996758924558,
|
|
"calibration/buffer_entropy_100bins": 0.9795660954880914,
|
|
"calibration/buffer_entropy_10bins": 0.9986996758924558,
|
|
"calibration/buffer_entropy_50bins": 0.9950240138509777,
|
|
"calibration/confidence_entropy": 0.49941824109751864,
|
|
"calibration/coverage@0%": 0.03948905332681017,
|
|
"calibration/coverage@1%": 0.06258102984344423,
|
|
"calibration/coverage@10%": 0.2123455846379648,
|
|
"calibration/coverage@15%": 0.3135373348825832,
|
|
"calibration/coverage@20%": 0.39717312866927595,
|
|
"calibration/coverage@25%": 0.47103183096868884,
|
|
"calibration/coverage@30%": 0.5230140044031311,
|
|
"calibration/coverage@5%": 0.14667471868884538,
|
|
"calibration/ece": 0.1526993445146855,
|
|
"calibration/mean_confidence": 0.5272793125303242,
|
|
"calibration/prompt_uniqueness": 0.8712022287168313,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 520.2,
|
|
"completions/max_terminated_length": 520.2,
|
|
"completions/mean_length": 221.2171875,
|
|
"completions/mean_terminated_length": 221.2171875,
|
|
"completions/min_length": 103.6,
|
|
"completions/min_terminated_length": 103.6,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0008509070612490177,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 900897878.0,
|
|
"reward": 0.9409607172012329,
|
|
"reward_std": 0.0743522085249424,
|
|
"rewards/accuracy_reward": 0.53916015625,
|
|
"rewards/brier_reward": 0.7976258873939515,
|
|
"rewards/confidence_uniqueness_reward": 0.9661872863769532,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002944446820765734,
|
|
"rewards/frontier_coverage_0": 0.10507128238677979,
|
|
"rewards/frontier_coverage_1": 0.10507128238677979,
|
|
"rewards/frontier_coverage_10": 0.10452338606119156,
|
|
"rewards/frontier_coverage_15": 0.09657607525587082,
|
|
"rewards/frontier_coverage_20": 0.05965607911348343,
|
|
"rewards/frontier_coverage_25": 0.045341891795396806,
|
|
"rewards/frontier_coverage_5": 0.1049825593829155,
|
|
"rewards/frontier_ece_reward": 0.0027819779235869644,
|
|
"rewards/frontier_entropy_batch_reward": -0.12909687906503678,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.078118896484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10906965583562851,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0390594482421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0390594482421875,
|
|
"signal/advantage_abs_mean": 0.05558159351348877,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05558159351348877,
|
|
"signal/advantage_pre_scale_std": 0.09597710967063904,
|
|
"signal/advantage_std": 0.09597710967063904,
|
|
"signal/brier_reward/centered_abs_mean": 0.11261094510555267,
|
|
"signal/brier_reward/group_bin_occupancy": 0.868359375,
|
|
"signal/brier_reward/group_std_mean": 0.14441443979740143,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0112610949203372,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0112610949203372,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012626891583204269,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.853125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016479195840656758,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012626891722902656,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012626891722902656,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002747892402112484,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.703125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004585493355989456,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.434865539020393e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.434865539020393e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14814209938049316,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19057103991508484,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018517762422561646,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018517762422561646,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14814209938049316,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19057103991508484,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018517762422561646,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018517762422561646,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14656879603862763,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18858122825622559,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018321099691092969,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018321099691092969,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13467406630516052,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.878515625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17328309118747712,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016834259033203125,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016834259033203125,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08421734273433686,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9078125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10839319676160812,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010527168167755007,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010527168167755007,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.054951707273721694,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9265625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07051893323659897,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006868963362649083,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006868963362649083,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1478155016899109,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1901459276676178,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018476937897503377,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018476937897503377,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0038679220713675024,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.658984375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00496214609593153,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038679222343489527,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038679222343489527,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18982007205486298,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.794921875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24912202656269072,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01898200698196888,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01898200698196888,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2768838924896155,
|
|
"calibration/batch_distribution_entropy": 0.9617687964595516,
|
|
"calibration/batch_entropy_100bins": 0.9343288350446255,
|
|
"calibration/batch_entropy_10bins": 0.9617687964595516,
|
|
"calibration/batch_entropy_50bins": 0.9600482617272046,
|
|
"calibration/batch_uniqueness": 0.966256189324948,
|
|
"calibration/buffer_distribution_entropy": 0.9987696399448172,
|
|
"calibration/buffer_entropy_100bins": 0.9778730418080352,
|
|
"calibration/buffer_entropy_10bins": 0.9987696399448172,
|
|
"calibration/buffer_entropy_50bins": 0.9949262133230533,
|
|
"calibration/confidence_entropy": 0.49433436483942234,
|
|
"calibration/coverage@0%": 0.004296875,
|
|
"calibration/coverage@1%": 0.004296875,
|
|
"calibration/coverage@10%": 0.11484375,
|
|
"calibration/coverage@15%": 0.1546875,
|
|
"calibration/coverage@20%": 0.284375,
|
|
"calibration/coverage@25%": 0.42265625,
|
|
"calibration/coverage@30%": 0.493359375,
|
|
"calibration/coverage@5%": 0.060546875,
|
|
"calibration/ece": 0.13839882712774432,
|
|
"calibration/mean_confidence": 0.600460792490651,
|
|
"calibration/prompt_uniqueness": 0.881765921728668,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 970.0,
|
|
"completions/max_terminated_length": 566.6,
|
|
"completions/mean_length": 223.150390625,
|
|
"completions/mean_terminated_length": 222.89442138671876,
|
|
"completions/min_length": 108.2,
|
|
"completions/min_terminated_length": 108.2,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0009654518216848373,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 918169754.0,
|
|
"reward": 0.9622796416282654,
|
|
"reward_std": 0.07733558416366577,
|
|
"rewards/accuracy_reward": 0.5888671875,
|
|
"rewards/brier_reward": 0.7954235196113586,
|
|
"rewards/confidence_uniqueness_reward": 0.9653422832489014,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002754275593906641,
|
|
"rewards/frontier_coverage_0": 0.07536681443452835,
|
|
"rewards/frontier_coverage_1": 0.07536681443452835,
|
|
"rewards/frontier_coverage_10": 0.07511008605360985,
|
|
"rewards/frontier_coverage_15": 0.06949677914381028,
|
|
"rewards/frontier_coverage_20": 0.052758050709962846,
|
|
"rewards/frontier_coverage_25": 0.047069764137268065,
|
|
"rewards/frontier_coverage_5": 0.07536681443452835,
|
|
"rewards/frontier_ece_reward": 0.0024394527310505508,
|
|
"rewards/frontier_entropy_batch_reward": -0.14224078506231308,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08975830078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.167578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11916272044181823,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044879150390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044879150390625,
|
|
"signal/advantage_abs_mean": 0.059315939992666246,
|
|
"signal/advantage_pre_scale_abs_mean": 0.059315939992666246,
|
|
"signal/advantage_pre_scale_std": 0.10067972093820572,
|
|
"signal/advantage_std": 0.10067972093820572,
|
|
"signal/brier_reward/centered_abs_mean": 0.11611681431531906,
|
|
"signal/brier_reward/group_bin_occupancy": 0.86328125,
|
|
"signal/brier_reward/group_std_mean": 0.1489056169986725,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011611681431531906,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011611681431531906,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013113933056592942,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87734375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017149509117007255,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013113933615386485,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013113933615386485,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027526959776878355,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719140625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00466503887437284,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.440870204940438e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.440870204940438e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15471426248550416,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19702006578445436,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019339283695444464,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019339283695444464,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15471426248550416,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19702006578445436,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019339283695444464,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019339283695444464,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15364649891853333,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19570617973804474,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001920581259764731,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001920581259764731,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13317597806453704,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1696739375591278,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016646997770294546,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016646997770294546,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08484991490840912,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.895703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10823871493339539,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010606239549815655,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010606239549815655,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05707306563854218,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.928515625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07243188172578811,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007134133367799223,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007134133367799223,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15471426248550416,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19702006578445436,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019339283695444464,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019339283695444464,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004025649838149547,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6640625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0050605999305844305,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004025649803224951,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004025649803224951,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1900925815105438,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.792578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25062963366508484,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019009258598089218,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019009258598089218,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3686495707449447,
|
|
"calibration/batch_distribution_entropy": 0.9786163924820228,
|
|
"calibration/batch_entropy_100bins": 0.9506496876710446,
|
|
"calibration/batch_entropy_10bins": 0.9786163924820228,
|
|
"calibration/batch_entropy_50bins": 0.973175814835131,
|
|
"calibration/batch_uniqueness": 0.964288330078125,
|
|
"calibration/buffer_distribution_entropy": 0.9985241376950669,
|
|
"calibration/buffer_entropy_100bins": 0.9767269257308445,
|
|
"calibration/buffer_entropy_10bins": 0.9985241376950669,
|
|
"calibration/buffer_entropy_50bins": 0.9947127581671238,
|
|
"calibration/confidence_entropy": 0.4652774799868107,
|
|
"calibration/coverage@0%": 0.0109375,
|
|
"calibration/coverage@1%": 0.0109375,
|
|
"calibration/coverage@10%": 0.055078125,
|
|
"calibration/coverage@15%": 0.14765625,
|
|
"calibration/coverage@20%": 0.2,
|
|
"calibration/coverage@25%": 0.250390625,
|
|
"calibration/coverage@30%": 0.327734375,
|
|
"calibration/coverage@5%": 0.0234375,
|
|
"calibration/ece": 0.15863184056992186,
|
|
"calibration/mean_confidence": 0.5096962844300781,
|
|
"calibration/prompt_uniqueness": 0.8615234375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 550.8,
|
|
"completions/max_terminated_length": 550.8,
|
|
"completions/mean_length": 222.6953125,
|
|
"completions/mean_terminated_length": 222.6953125,
|
|
"completions/min_length": 101.8,
|
|
"completions/min_terminated_length": 101.8,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0008373066666536033,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 935597226.0,
|
|
"reward": 0.9281533002853394,
|
|
"reward_std": 0.0796884223818779,
|
|
"rewards/accuracy_reward": 0.51435546875,
|
|
"rewards/brier_reward": 0.7964529275894165,
|
|
"rewards/confidence_uniqueness_reward": 0.9651763916015625,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.003106454946100712,
|
|
"rewards/frontier_coverage_0": 0.11970811784267425,
|
|
"rewards/frontier_coverage_1": 0.11970811784267425,
|
|
"rewards/frontier_coverage_10": 0.11946074962615967,
|
|
"rewards/frontier_coverage_15": 0.10414378494024276,
|
|
"rewards/frontier_coverage_20": 0.07021132558584213,
|
|
"rewards/frontier_coverage_25": 0.05124953538179398,
|
|
"rewards/frontier_coverage_5": 0.11970811784267425,
|
|
"rewards/frontier_ece_reward": 0.0030206756200641394,
|
|
"rewards/frontier_entropy_batch_reward": -0.14252967536449432,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091741943359375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12339542210102081,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0458709716796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0458709716796875,
|
|
"signal/advantage_abs_mean": 0.061288871616125104,
|
|
"signal/advantage_pre_scale_abs_mean": 0.061288871616125104,
|
|
"signal/advantage_pre_scale_std": 0.10269816666841507,
|
|
"signal/advantage_std": 0.10269816666841507,
|
|
"signal/brier_reward/centered_abs_mean": 0.12020632475614548,
|
|
"signal/brier_reward/group_bin_occupancy": 0.840234375,
|
|
"signal/brier_reward/group_std_mean": 0.15456699430942536,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012020632438361645,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012020632438361645,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01308910846710205,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85390625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016654768399894236,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001308910851366818,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001308910851366818,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031963346991688013,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.694921875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005663991440087557,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9954184467205776e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9954184467205776e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16143686175346375,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20570681393146514,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020179608603939415,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020179608603939415,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16143686175346375,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20570681393146514,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020179608603939415,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020179608603939415,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16061599552631378,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8671875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2046827495098114,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020077000837773083,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020077000837773083,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1376311719417572,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17559443712234496,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001720389723777771,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001720389723777771,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08813889771699905,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11294594407081604,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011017362354323267,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011017362354323267,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05825449377298355,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.927734375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07462759166955948,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007281811907887459,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007281811907887459,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16143686175346375,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20570681393146514,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020179608603939415,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020179608603939415,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004061997029930353,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.656640625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005123640317469836,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00040619971114210787,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00040619971114210787,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19641498029232024,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2599118322134018,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01964149847626686,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01964149847626686,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.35893480202497713,
|
|
"calibration/batch_distribution_entropy": 0.9836316827385708,
|
|
"calibration/batch_entropy_100bins": 0.951102171596286,
|
|
"calibration/batch_entropy_10bins": 0.9836316827385708,
|
|
"calibration/batch_entropy_50bins": 0.9744830634854351,
|
|
"calibration/batch_uniqueness": 0.9661264253561747,
|
|
"calibration/buffer_distribution_entropy": 0.9982682590269872,
|
|
"calibration/buffer_entropy_100bins": 0.9763659335051681,
|
|
"calibration/buffer_entropy_10bins": 0.9982682590269872,
|
|
"calibration/buffer_entropy_50bins": 0.9945770986362333,
|
|
"calibration/confidence_entropy": 0.49407042141491697,
|
|
"calibration/coverage@0%": 0.0125,
|
|
"calibration/coverage@1%": 0.0125,
|
|
"calibration/coverage@10%": 0.0390625,
|
|
"calibration/coverage@15%": 0.044921875,
|
|
"calibration/coverage@20%": 0.16015625,
|
|
"calibration/coverage@25%": 0.3020907228473581,
|
|
"calibration/coverage@30%": 0.47141557607632095,
|
|
"calibration/coverage@5%": 0.018359375,
|
|
"calibration/ece": 0.1521837402607701,
|
|
"calibration/mean_confidence": 0.5152094586979612,
|
|
"calibration/prompt_uniqueness": 0.865876762080515,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 700.0,
|
|
"completions/max_terminated_length": 506.8,
|
|
"completions/mean_length": 226.031640625,
|
|
"completions/mean_terminated_length": 225.90355529785157,
|
|
"completions/min_length": 99.4,
|
|
"completions/min_terminated_length": 99.4,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0007808567606844008,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 953022638.0,
|
|
"reward": 0.9413276553153992,
|
|
"reward_std": 0.0753513365983963,
|
|
"rewards/accuracy_reward": 0.54345703125,
|
|
"rewards/brier_reward": 0.792955505847931,
|
|
"rewards/confidence_uniqueness_reward": 0.9654982805252075,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.00300391623750329,
|
|
"rewards/frontier_coverage_0": 0.10249666720628739,
|
|
"rewards/frontier_coverage_1": 0.10249666720628739,
|
|
"rewards/frontier_coverage_10": 0.10201466381549835,
|
|
"rewards/frontier_coverage_15": 0.0916542112827301,
|
|
"rewards/frontier_coverage_20": 0.06445520780980588,
|
|
"rewards/frontier_coverage_25": 0.04657732546329498,
|
|
"rewards/frontier_coverage_5": 0.10260441452264786,
|
|
"rewards/frontier_ece_reward": 0.002176952688023448,
|
|
"rewards/frontier_entropy_batch_reward": -0.1403130456805229,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084613037109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11815287619829178,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0423065185546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0423065185546875,
|
|
"signal/advantage_abs_mean": 0.056077169626951216,
|
|
"signal/advantage_pre_scale_abs_mean": 0.056077169626951216,
|
|
"signal/advantage_pre_scale_std": 0.09611705392599106,
|
|
"signal/advantage_std": 0.09611705392599106,
|
|
"signal/brier_reward/centered_abs_mean": 0.11416497230529785,
|
|
"signal/brier_reward/group_bin_occupancy": 0.862890625,
|
|
"signal/brier_reward/group_std_mean": 0.1458996891975403,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011416497454047204,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011416497454047204,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01277841292321682,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.863671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016587360575795174,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012778413249179721,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012778413249179721,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026259610895067453,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004510869830846786,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.282451471022796e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.282451471022796e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15629157423973083,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20013498663902282,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019536447478458287,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019536447478458287,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15629157423973083,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20013498663902282,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019536447478458287,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019536447478458287,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15510292947292328,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19859937131404876,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019387866836041213,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019387866836041213,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13360550701618196,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17097563147544861,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016700688749551774,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016700688749551774,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08658400624990463,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11109910905361176,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010823000688105822,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010823000688105822,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05502760782837868,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07039647549390793,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006878450978547335,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006878450978547335,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15607451200485228,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19986163973808288,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001950931502506137,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001950931502506137,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003750496730208397,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.648046875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004760450683534145,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00037504968349821866,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00037504968349821866,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1986088812351227,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.754296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2622865170240402,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01986088827252388,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01986088827252388,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34563367565090275,
|
|
"calibration/batch_distribution_entropy": 0.9775858117370444,
|
|
"calibration/batch_entropy_100bins": 0.9498917342401729,
|
|
"calibration/batch_entropy_10bins": 0.9775858117370444,
|
|
"calibration/batch_entropy_50bins": 0.9729999885268757,
|
|
"calibration/batch_uniqueness": 0.96455078125,
|
|
"calibration/buffer_distribution_entropy": 0.998200870549627,
|
|
"calibration/buffer_entropy_100bins": 0.9762282535895963,
|
|
"calibration/buffer_entropy_10bins": 0.998200870549627,
|
|
"calibration/buffer_entropy_50bins": 0.9945279583894413,
|
|
"calibration/confidence_entropy": 0.5020932663493712,
|
|
"calibration/coverage@0%": 0.030859375,
|
|
"calibration/coverage@1%": 0.030859375,
|
|
"calibration/coverage@10%": 0.0953125,
|
|
"calibration/coverage@15%": 0.168359375,
|
|
"calibration/coverage@20%": 0.27109375,
|
|
"calibration/coverage@25%": 0.35703125,
|
|
"calibration/coverage@30%": 0.4875,
|
|
"calibration/coverage@5%": 0.044140625,
|
|
"calibration/ece": 0.143530546875,
|
|
"calibration/mean_confidence": 0.478352265625,
|
|
"calibration/prompt_uniqueness": 0.869384765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1195.8,
|
|
"completions/max_terminated_length": 617.6,
|
|
"completions/mean_length": 228.848828125,
|
|
"completions/mean_terminated_length": 228.46502075195312,
|
|
"completions/min_length": 102.6,
|
|
"completions/min_terminated_length": 102.6,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.000804597744718194,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 970417346.0,
|
|
"reward": 0.945005738735199,
|
|
"reward_std": 0.07620135098695754,
|
|
"rewards/accuracy_reward": 0.5462890625,
|
|
"rewards/brier_reward": 0.7971989035606384,
|
|
"rewards/confidence_uniqueness_reward": 0.9653137803077698,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002510549989528954,
|
|
"rewards/frontier_coverage_0": 0.10179270692169666,
|
|
"rewards/frontier_coverage_1": 0.10179270692169666,
|
|
"rewards/frontier_coverage_10": 0.10152241215109825,
|
|
"rewards/frontier_coverage_15": 0.09234566390514373,
|
|
"rewards/frontier_coverage_20": 0.06595533415675163,
|
|
"rewards/frontier_coverage_25": 0.04856384471058846,
|
|
"rewards/frontier_coverage_5": 0.10167570598423481,
|
|
"rewards/frontier_ece_reward": 0.002250720001757145,
|
|
"rewards/frontier_entropy_batch_reward": -0.12107873558998108,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08663330078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11930725127458572,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043316650390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.043316650390625,
|
|
"signal/advantage_abs_mean": 0.057180730253458024,
|
|
"signal/advantage_pre_scale_abs_mean": 0.057180730253458024,
|
|
"signal/advantage_pre_scale_std": 0.09748922139406205,
|
|
"signal/advantage_std": 0.09748922139406205,
|
|
"signal/brier_reward/centered_abs_mean": 0.12046879529953003,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85625,
|
|
"signal/brier_reward/group_std_mean": 0.15507035553455353,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012046879716217518,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012046879716217518,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013228565640747546,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.86015625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01768874190747738,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001322856592014432,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001322856592014432,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023816948756575584,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71015625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004248477658256889,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9771187109872697e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9771187109872697e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1678558111190796,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.876171875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21537896990776062,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002098197676241398,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002098197676241398,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1678558111190796,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.876171875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21537896990776062,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002098197676241398,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002098197676241398,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16678664982318878,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.875390625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21400391459465026,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002084833150729537,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002084833150729537,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14504911601543427,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18602396845817565,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001813114038668573,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001813114038668573,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0916235864162445,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88828125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11773888915777206,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001145294844172895,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001145294844172895,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.058785200119018555,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.917578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07563513517379761,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007348150131292642,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007348150131292642,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16751802563667298,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.876171875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21494931280612944,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020939753856509926,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020939753856509926,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0036217204295098783,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.645703125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004650117922574281,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003621720476076007,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003621720476076007,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.18358459174633027,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751953125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24674877524375916,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.018358458951115608,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.018358458951115608,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4062298187879727,
|
|
"calibration/batch_distribution_entropy": 0.9874305729560149,
|
|
"calibration/batch_entropy_100bins": 0.9521924925781645,
|
|
"calibration/batch_entropy_10bins": 0.9874305729560149,
|
|
"calibration/batch_entropy_50bins": 0.9740365739770798,
|
|
"calibration/batch_uniqueness": 0.96695556640625,
|
|
"calibration/buffer_distribution_entropy": 0.9983456480552251,
|
|
"calibration/buffer_entropy_100bins": 0.9763740046184466,
|
|
"calibration/buffer_entropy_10bins": 0.9983456480552251,
|
|
"calibration/buffer_entropy_50bins": 0.994612891791807,
|
|
"calibration/confidence_entropy": 0.5023546384987589,
|
|
"calibration/coverage@0%": 0.005078125,
|
|
"calibration/coverage@1%": 0.005078125,
|
|
"calibration/coverage@10%": 0.010546875,
|
|
"calibration/coverage@15%": 0.0171875,
|
|
"calibration/coverage@20%": 0.034765625,
|
|
"calibration/coverage@25%": 0.05625,
|
|
"calibration/coverage@30%": 0.170703125,
|
|
"calibration/coverage@5%": 0.005078125,
|
|
"calibration/ece": 0.130933474109375,
|
|
"calibration/mean_confidence": 0.5065108178593749,
|
|
"calibration/prompt_uniqueness": 0.874267578125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 502.6,
|
|
"completions/max_terminated_length": 502.6,
|
|
"completions/mean_length": 219.16572265625,
|
|
"completions/mean_terminated_length": 219.16572265625,
|
|
"completions/min_length": 103.0,
|
|
"completions/min_terminated_length": 103.0,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.000721353106200695,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0004,
|
|
"num_tokens": 987688419.0,
|
|
"reward": 0.932941198348999,
|
|
"reward_std": 0.07610684931278229,
|
|
"rewards/accuracy_reward": 0.52978515625,
|
|
"rewards/brier_reward": 0.7867828845977783,
|
|
"rewards/confidence_uniqueness_reward": 0.9635459899902343,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.003236295934766531,
|
|
"rewards/frontier_coverage_0": 0.11244506537914276,
|
|
"rewards/frontier_coverage_1": 0.11244506537914276,
|
|
"rewards/frontier_coverage_10": 0.11204758733510971,
|
|
"rewards/frontier_coverage_15": 0.098624786734581,
|
|
"rewards/frontier_coverage_20": 0.06998651325702668,
|
|
"rewards/frontier_coverage_25": 0.05211614817380905,
|
|
"rewards/frontier_coverage_5": 0.11240084767341614,
|
|
"rewards/frontier_ece_reward": 0.002500415127724409,
|
|
"rewards/frontier_entropy_batch_reward": -0.1556967318058014,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088250732421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11783604025840759,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441253662109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0441253662109375,
|
|
"signal/advantage_abs_mean": 0.05913291722536087,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05913291722536087,
|
|
"signal/advantage_pre_scale_std": 0.0993342086672783,
|
|
"signal/advantage_std": 0.0993342086672783,
|
|
"signal/brier_reward/centered_abs_mean": 0.12455331236124038,
|
|
"signal/brier_reward/group_bin_occupancy": 0.838671875,
|
|
"signal/brier_reward/group_std_mean": 0.15881660878658294,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012455331720411777,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012455331720411777,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014005064964294434,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.863671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01789715252816677,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014005065197125078,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014005065197125078,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003072420973330736,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005341992899775505,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.8405264785978946e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.8405264785978946e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16175754070281984,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.84609375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20798506438732148,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020219693426042793,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020219693426042793,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16175754070281984,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.84609375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20798506438732148,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020219693426042793,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020219693426042793,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1606542646884918,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.845703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20660010874271392,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020081782713532448,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020081782713532448,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.138150891661644,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.83984375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.178511181473732,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017268861876800657,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017268861876800657,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08926409333944321,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.863671875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11590456813573838,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011158011853694915,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011158011853694915,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06056609675288201,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.921875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07759960442781448,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000757076172158122,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000757076172158122,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16137201189994813,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.845703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2075114369392395,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020171501440927387,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020171501440927387,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003959872899577022,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.637109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0050338350236415865,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003959872992709279,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003959872992709279,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20363571047782897,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.778125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.26806468367576597,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.020363571867346764,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020363571867346764,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2766497709707625,
|
|
"calibration/batch_distribution_entropy": 0.9876647648242093,
|
|
"calibration/batch_entropy_100bins": 0.9541677353832169,
|
|
"calibration/batch_entropy_10bins": 0.9876647648242093,
|
|
"calibration/batch_entropy_50bins": 0.9780485077011252,
|
|
"calibration/batch_uniqueness": 0.9672607421875,
|
|
"calibration/buffer_distribution_entropy": 0.9984667280203248,
|
|
"calibration/buffer_entropy_100bins": 0.9765595176283114,
|
|
"calibration/buffer_entropy_10bins": 0.9984667280203248,
|
|
"calibration/buffer_entropy_50bins": 0.9946736777858677,
|
|
"calibration/confidence_entropy": 0.5094665348781423,
|
|
"calibration/coverage@0%": 0.04140625,
|
|
"calibration/coverage@1%": 0.04140625,
|
|
"calibration/coverage@10%": 0.1859375,
|
|
"calibration/coverage@15%": 0.258984375,
|
|
"calibration/coverage@20%": 0.34375,
|
|
"calibration/coverage@25%": 0.4078125,
|
|
"calibration/coverage@30%": 0.534765625,
|
|
"calibration/coverage@5%": 0.0640625,
|
|
"calibration/ece": 0.10064012623476563,
|
|
"calibration/mean_confidence": 0.4969223737652344,
|
|
"calibration/prompt_uniqueness": 0.871875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 544.2,
|
|
"completions/max_terminated_length": 544.2,
|
|
"completions/mean_length": 222.33515625,
|
|
"completions/mean_terminated_length": 222.33515625,
|
|
"completions/min_length": 100.0,
|
|
"completions/min_terminated_length": 100.0,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0009958718437701464,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 1004940555.0,
|
|
"reward": 0.9349647402763367,
|
|
"reward_std": 0.08481650799512863,
|
|
"rewards/accuracy_reward": 0.52861328125,
|
|
"rewards/brier_reward": 0.7945270657539367,
|
|
"rewards/confidence_uniqueness_reward": 0.9638397216796875,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0029068424366414545,
|
|
"rewards/frontier_coverage_0": 0.11733992844820022,
|
|
"rewards/frontier_coverage_1": 0.11733992844820022,
|
|
"rewards/frontier_coverage_10": 0.11630354076623917,
|
|
"rewards/frontier_coverage_15": 0.10327569544315338,
|
|
"rewards/frontier_coverage_20": 0.07216155454516411,
|
|
"rewards/frontier_coverage_25": 0.0491545557975769,
|
|
"rewards/frontier_coverage_5": 0.11695939749479294,
|
|
"rewards/frontier_ece_reward": 0.0026014718692749738,
|
|
"rewards/frontier_entropy_batch_reward": -0.14059094190597535,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.110321044921875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14301176518201827,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0551605224609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0551605224609375,
|
|
"signal/advantage_abs_mean": 0.06603206172585488,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06603206172585488,
|
|
"signal/advantage_pre_scale_std": 0.10855630040168762,
|
|
"signal/advantage_std": 0.10855630040168762,
|
|
"signal/brier_reward/centered_abs_mean": 0.11559386998414993,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85625,
|
|
"signal/brier_reward/group_std_mean": 0.14800142645835876,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011559387482702733,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011559387482702733,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014104413986206054,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.852734375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017873943597078324,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014104413567110896,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014104413567110896,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025243773590773342,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.725,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004259524215012789,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.155471749778371e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.155471749778371e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16891084015369415,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2158743679523468,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021113855065777896,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021113855065777896,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16891084015369415,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2158743679523468,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021113855065777896,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021113855065777896,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16640540361404418,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2127244621515274,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002080067666247487,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002080067666247487,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14721881449222565,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18873787820339202,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018402352230623364,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018402352230623364,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09060794413089752,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.878125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11698480546474457,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001132599334232509,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001132599334232509,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05698830112814903,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07349057048559189,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007123537710867822,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007123537710867822,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1684437781572342,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.870703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2152959108352661,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021055472549051046,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021055472549051046,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00390915535390377,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004986092075705528,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00039091553771868347,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00039091553771868347,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19727673530578613,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76484375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.25940607488155365,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019727673567831516,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019727673567831516,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32868832388728325,
|
|
"calibration/batch_distribution_entropy": 0.9856009062503024,
|
|
"calibration/batch_entropy_100bins": 0.9540635512806951,
|
|
"calibration/batch_entropy_10bins": 0.9856009062503024,
|
|
"calibration/batch_entropy_50bins": 0.975213897153728,
|
|
"calibration/batch_uniqueness": 0.9663665771484375,
|
|
"calibration/buffer_distribution_entropy": 0.9985059192901078,
|
|
"calibration/buffer_entropy_100bins": 0.9767553596300397,
|
|
"calibration/buffer_entropy_10bins": 0.9985059192901078,
|
|
"calibration/buffer_entropy_50bins": 0.9946862407467618,
|
|
"calibration/confidence_entropy": 0.4862125053483779,
|
|
"calibration/coverage@0%": 0.005859375,
|
|
"calibration/coverage@1%": 0.005859375,
|
|
"calibration/coverage@10%": 0.096875,
|
|
"calibration/coverage@15%": 0.209765625,
|
|
"calibration/coverage@20%": 0.28046875,
|
|
"calibration/coverage@25%": 0.34609375,
|
|
"calibration/coverage@30%": 0.404296875,
|
|
"calibration/coverage@5%": 0.07421875,
|
|
"calibration/ece": 0.14568909689453127,
|
|
"calibration/mean_confidence": 0.5053752603015624,
|
|
"calibration/prompt_uniqueness": 0.86279296875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 696.8,
|
|
"completions/max_terminated_length": 507.8,
|
|
"completions/mean_length": 222.09775390625,
|
|
"completions/mean_terminated_length": 221.96982421875,
|
|
"completions/min_length": 102.6,
|
|
"completions/min_terminated_length": 102.6,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.000792116392403841,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 1022155156.0,
|
|
"reward": 0.9359057307243347,
|
|
"reward_std": 0.06655998975038528,
|
|
"rewards/accuracy_reward": 0.52568359375,
|
|
"rewards/brier_reward": 0.8039435505867004,
|
|
"rewards/confidence_uniqueness_reward": 0.9652412414550782,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0029766473453491926,
|
|
"rewards/frontier_coverage_0": 0.11361845880746842,
|
|
"rewards/frontier_coverage_1": 0.11361845880746842,
|
|
"rewards/frontier_coverage_10": 0.1131316065788269,
|
|
"rewards/frontier_coverage_15": 0.10452397763729096,
|
|
"rewards/frontier_coverage_20": 0.0738675132393837,
|
|
"rewards/frontier_coverage_25": 0.05374932512640953,
|
|
"rewards/frontier_coverage_5": 0.11348761469125748,
|
|
"rewards/frontier_ece_reward": 0.0027279237285256384,
|
|
"rewards/frontier_entropy_batch_reward": -0.12616288512945176,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.070672607421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.09988184720277786,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0353363037109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0353363037109375,
|
|
"signal/advantage_abs_mean": 0.0491713747382164,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0491713747382164,
|
|
"signal/advantage_pre_scale_std": 0.08726191073656082,
|
|
"signal/advantage_std": 0.08726191073656082,
|
|
"signal/brier_reward/centered_abs_mean": 0.10785721391439437,
|
|
"signal/brier_reward/group_bin_occupancy": 0.834765625,
|
|
"signal/brier_reward/group_std_mean": 0.140894290804863,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01078572142869234,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01078572142869234,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013079667091369629,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.851171875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01671627685427666,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013079666998237372,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013079666998237372,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002698933309875429,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.691015625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0046086800284683704,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3736664772732186e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3736664772732186e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14771159887313842,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18986626863479614,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001846395037136972,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001846395037136972,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14771159887313842,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18986626863479614,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001846395037136972,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001846395037136972,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14660803675651551,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.85859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18843259811401367,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018326004967093468,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018326004967093468,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13180427700281144,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.853515625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1698448807001114,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016475534765049815,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016475534765049815,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08136253356933594,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10504998713731765,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010170316556468606,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010170316556468606,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.056659433990716934,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07215163707733155,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000708242948167026,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000708242948167026,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14725472331047057,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18927786350250245,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018406840972602367,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018406840972602367,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003890192415565252,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.630859375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004982131253927946,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038901924854144453,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038901924854144453,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1808852344751358,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.755859375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.24199664890766143,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01808852329850197,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01808852329850197,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.4616949388658732,
|
|
"eval_calibration/batch_distribution_entropy": 0.927457138975722,
|
|
"eval_calibration/batch_entropy_100bins": 0.7084673039767129,
|
|
"eval_calibration/batch_entropy_10bins": 0.927457138975722,
|
|
"eval_calibration/batch_entropy_50bins": 0.7931473827535757,
|
|
"eval_calibration/batch_uniqueness": 0.9091796875,
|
|
"eval_calibration/buffer_distribution_entropy": 0.99835496744854,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9767894525028088,
|
|
"eval_calibration/buffer_entropy_10bins": 0.99835496744854,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9945313200798988,
|
|
"eval_calibration/confidence_entropy": 0.49295569737742106,
|
|
"eval_calibration/coverage@0%": 0.0546875,
|
|
"eval_calibration/coverage@1%": 0.0546875,
|
|
"eval_calibration/coverage@10%": 0.0546875,
|
|
"eval_calibration/coverage@15%": 0.0546875,
|
|
"eval_calibration/coverage@20%": 0.0625,
|
|
"eval_calibration/coverage@25%": 0.15625,
|
|
"eval_calibration/coverage@30%": 0.3125,
|
|
"eval_calibration/coverage@5%": 0.0546875,
|
|
"eval_calibration/ece": 0.19367187500000002,
|
|
"eval_calibration/mean_confidence": 0.4317968750000001,
|
|
"eval_calibration/prompt_uniqueness": 0.9091796875,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 380.5,
|
|
"eval_completions/max_terminated_length": 380.5,
|
|
"eval_completions/mean_length": 222.7589569091797,
|
|
"eval_completions/mean_terminated_length": 222.7589569091797,
|
|
"eval_completions/min_length": 124.0,
|
|
"eval_completions/min_terminated_length": 124.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1022155156.0,
|
|
"eval_reward": 0.8408344089984894,
|
|
"eval_reward_std": 0.23095110431313515,
|
|
"eval_rewards/accuracy_reward": 0.431640625,
|
|
"eval_rewards/brier_reward": 0.8074806481599808,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.9091796875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0033944263705052435,
|
|
"eval_rewards/frontier_coverage_0": 0.1909365188330412,
|
|
"eval_rewards/frontier_coverage_1": 0.1909365188330412,
|
|
"eval_rewards/frontier_coverage_10": 0.18911465257406235,
|
|
"eval_rewards/frontier_coverage_15": 0.1690607387572527,
|
|
"eval_rewards/frontier_coverage_20": 0.10447167791426182,
|
|
"eval_rewards/frontier_coverage_25": 0.05829550698399544,
|
|
"eval_rewards/frontier_coverage_5": 0.1909365188330412,
|
|
"eval_rewards/frontier_ece_reward": 0.003318383765872568,
|
|
"eval_rewards/frontier_entropy_batch_reward": -0.6061325073242188,
|
|
"eval_runtime": 20.378,
|
|
"eval_samples_per_second": 24.536,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.493147149682045,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875,
|
|
"eval_signal/advantage_abs_mean": 0.21535654366016388,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21535654366016388,
|
|
"eval_signal/advantage_pre_scale_std": 0.2285638451576233,
|
|
"eval_signal/advantage_std": 0.2285638451576233,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.17071156948804855,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.9140625,
|
|
"eval_signal/brier_reward/group_std_mean": 0.21894007921218872,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017071157693862915,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.017071157693862915,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0352630615234375,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3359375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04285713844001293,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035263061290606856,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035263061290606856,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0043263022089377046,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008629275194834918,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.4078777338872897e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.4078777338872897e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3566969484090805,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.42929134517908096,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004458711948245764,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004458711948245764,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3566969484090805,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.42929134517908096,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004458711948245764,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004458711948245764,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.352319672703743,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4243213012814522,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004403996164910495,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004403996164910495,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3145061433315277,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.38166315108537674,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003931326966267079,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003931326966267079,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.17512128874659538,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.90625,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.22007206827402115,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002189016144257039,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002189016144257039,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0862487182021141,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.10976832546293736,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010781089658848941,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010781089658848941,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3566969484090805,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.42929134517908096,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004458711948245764,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004458711948245764,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.005990799865685403,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8984375,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.007247602799907327,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005990800127619877,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005990800127619877,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32252073287963867,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.33716534078121185,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03225207328796387,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225207328796387,
|
|
"eval_steps_per_second": 0.196,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24985993469125098,
|
|
"calibration/batch_distribution_entropy": 0.9709909089719659,
|
|
"calibration/batch_entropy_100bins": 0.9447107719359022,
|
|
"calibration/batch_entropy_10bins": 0.9709909089719659,
|
|
"calibration/batch_entropy_50bins": 0.9677236983664186,
|
|
"calibration/batch_uniqueness": 0.9638519287109375,
|
|
"calibration/buffer_distribution_entropy": 0.9983593120363045,
|
|
"calibration/buffer_entropy_100bins": 0.9769284851620673,
|
|
"calibration/buffer_entropy_10bins": 0.9983593120363045,
|
|
"calibration/buffer_entropy_50bins": 0.9945877722075391,
|
|
"calibration/confidence_entropy": 0.5034992213269515,
|
|
"calibration/coverage@0%": 0.03984375,
|
|
"calibration/coverage@1%": 0.03984375,
|
|
"calibration/coverage@10%": 0.28359375,
|
|
"calibration/coverage@15%": 0.384765625,
|
|
"calibration/coverage@20%": 0.46328125,
|
|
"calibration/coverage@25%": 0.54296875,
|
|
"calibration/coverage@30%": 0.58984375,
|
|
"calibration/coverage@5%": 0.084765625,
|
|
"calibration/ece": 0.14316060628906252,
|
|
"calibration/mean_confidence": 0.48792689371093745,
|
|
"calibration/prompt_uniqueness": 0.865625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 720.2,
|
|
"completions/max_terminated_length": 522.8,
|
|
"completions/mean_length": 223.82119140625,
|
|
"completions/mean_terminated_length": 223.6925476074219,
|
|
"completions/min_length": 104.6,
|
|
"completions/min_terminated_length": 104.6,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0008361483342014253,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 1039308205.0,
|
|
"reward": 0.9485756874084472,
|
|
"reward_std": 0.07829709947109223,
|
|
"rewards/accuracy_reward": 0.5544921875,
|
|
"rewards/brier_reward": 0.7974941968917847,
|
|
"rewards/confidence_uniqueness_reward": 0.9653183341026306,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002789009315893054,
|
|
"rewards/frontier_coverage_0": 0.10211436282843352,
|
|
"rewards/frontier_coverage_1": 0.10211436282843352,
|
|
"rewards/frontier_coverage_10": 0.10185855161398649,
|
|
"rewards/frontier_coverage_15": 0.09434006288647652,
|
|
"rewards/frontier_coverage_20": 0.06686797887086868,
|
|
"rewards/frontier_coverage_25": 0.04957782253623009,
|
|
"rewards/frontier_coverage_5": 0.10211436282843352,
|
|
"rewards/frontier_ece_reward": 0.002479456667788327,
|
|
"rewards/frontier_entropy_batch_reward": -0.12853255420923232,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13040682971477507,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.048046875,
|
|
"signal/advantage_abs_mean": 0.058770731836557386,
|
|
"signal/advantage_pre_scale_abs_mean": 0.058770731836557386,
|
|
"signal/advantage_pre_scale_std": 0.10008790940046311,
|
|
"signal/advantage_std": 0.10008790940046311,
|
|
"signal/brier_reward/centered_abs_mean": 0.11222728043794632,
|
|
"signal/brier_reward/group_bin_occupancy": 0.848828125,
|
|
"signal/brier_reward/group_std_mean": 0.14573695361614228,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011222727596759796,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011222727596759796,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01253490149974823,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.882421875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015887865237891673,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001253490149974823,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001253490149974823,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024836147669702767,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00410917429253459,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1045184732647614e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1045184732647614e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1621830016374588,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20892693996429443,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002027287520468235,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002027287520468235,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1621830016374588,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20892693996429443,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002027287520468235,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002027287520468235,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16161874830722808,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2081727385520935,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020202343817800283,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020202343817800283,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14477128386497498,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.862109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18677389919757842,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001809641090221703,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001809641090221703,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08578696697950364,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11098419278860092,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010723370942287147,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010723370942287147,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0560153141617775,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.934375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07195385619997978,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007001914316788315,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007001914316788315,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1621830016374588,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20892693996429443,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002027287520468235,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002027287520468235,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0039973936509341,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.63984375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.005053135752677918,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003997393825557083,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003997393825557083,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1855588138103485,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2466350704431534,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01855588089674711,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.01855588089674711,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3487669014807945,
|
|
"calibration/batch_distribution_entropy": 0.9754493435740519,
|
|
"calibration/batch_entropy_100bins": 0.952291511351636,
|
|
"calibration/batch_entropy_10bins": 0.9754493435740519,
|
|
"calibration/batch_entropy_50bins": 0.9740999166880734,
|
|
"calibration/batch_uniqueness": 0.9618896484375,
|
|
"calibration/buffer_distribution_entropy": 0.9983498943855491,
|
|
"calibration/buffer_entropy_100bins": 0.9769291495405834,
|
|
"calibration/buffer_entropy_10bins": 0.9983498943855491,
|
|
"calibration/buffer_entropy_50bins": 0.9945590427113518,
|
|
"calibration/confidence_entropy": 0.48104339531250256,
|
|
"calibration/coverage@0%": 0.015625,
|
|
"calibration/coverage@1%": 0.015625,
|
|
"calibration/coverage@10%": 0.083203125,
|
|
"calibration/coverage@15%": 0.112890625,
|
|
"calibration/coverage@20%": 0.18828125,
|
|
"calibration/coverage@25%": 0.253125,
|
|
"calibration/coverage@30%": 0.4546875,
|
|
"calibration/coverage@5%": 0.031640625,
|
|
"calibration/ece": 0.13703221653203126,
|
|
"calibration/mean_confidence": 0.4620144665812499,
|
|
"calibration/prompt_uniqueness": 0.856494140625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 697.8,
|
|
"completions/max_terminated_length": 486.6,
|
|
"completions/mean_length": 226.31220703125,
|
|
"completions/mean_terminated_length": 226.1843505859375,
|
|
"completions/min_length": 101.6,
|
|
"completions/min_terminated_length": 101.6,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.001037325244396925,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 1056754122.0,
|
|
"reward": 0.9321189403533936,
|
|
"reward_std": 0.07475720196962357,
|
|
"rewards/accuracy_reward": 0.5267578125,
|
|
"rewards/brier_reward": 0.7943168759346009,
|
|
"rewards/confidence_uniqueness_reward": 0.9623941659927369,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002925048628821969,
|
|
"rewards/frontier_coverage_0": 0.11955789923667907,
|
|
"rewards/frontier_coverage_1": 0.11955789923667907,
|
|
"rewards/frontier_coverage_10": 0.11875800639390946,
|
|
"rewards/frontier_coverage_15": 0.1080111250281334,
|
|
"rewards/frontier_coverage_20": 0.07299467772245408,
|
|
"rewards/frontier_coverage_25": 0.053522860258817674,
|
|
"rewards/frontier_coverage_5": 0.11955789923667907,
|
|
"rewards/frontier_ece_reward": 0.0025301900692284106,
|
|
"rewards/frontier_entropy_batch_reward": -0.1599818915128708,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0859619140625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.169140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11736536026000977,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04298095703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04298095703125,
|
|
"signal/advantage_abs_mean": 0.05674861744046211,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05674861744046211,
|
|
"signal/advantage_pre_scale_std": 0.09784113019704818,
|
|
"signal/advantage_std": 0.09784113019704818,
|
|
"signal/brier_reward/centered_abs_mean": 0.1121656432747841,
|
|
"signal/brier_reward/group_bin_occupancy": 0.848046875,
|
|
"signal/brier_reward/group_std_mean": 0.14315251410007476,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011216564849019051,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011216564849019051,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01467701867222786,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.837890625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019009753316640853,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014677019091323019,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014677019091323019,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002658972842618823,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.715625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004619904328137636,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.323716045997571e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.323716045997571e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15493883788585663,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19741571545600892,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019367355620488525,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019367355620488525,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15493883788585663,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19741571545600892,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019367355620488525,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019367355620488525,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15374906808137895,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19593823254108428,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019218633184209465,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019218633184209465,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13852892816066742,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1765392690896988,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017316116951406001,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017316116951406001,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07982205301523208,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10253897607326508,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009977756650187074,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009977756650187074,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05590454265475273,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.92109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07175193578004838,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006988067994825542,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006988067994825542,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15493883788585663,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19741571545600892,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019367355620488525,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019367355620488525,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0038536326494067906,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.637109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0048526331782341005,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038536327192559836,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038536327192559836,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20577935874462128,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.742578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.26543656289577483,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.020577935874462126,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020577935874462126,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28304180812555446,
|
|
"calibration/batch_distribution_entropy": 0.9570053825772713,
|
|
"calibration/batch_entropy_100bins": 0.9338788681757995,
|
|
"calibration/batch_entropy_10bins": 0.9570053825772713,
|
|
"calibration/batch_entropy_50bins": 0.9562927539554573,
|
|
"calibration/batch_uniqueness": 0.9649124145507812,
|
|
"calibration/buffer_distribution_entropy": 0.9983675863194514,
|
|
"calibration/buffer_entropy_100bins": 0.9769959361817846,
|
|
"calibration/buffer_entropy_10bins": 0.9983675863194514,
|
|
"calibration/buffer_entropy_50bins": 0.9945730040176226,
|
|
"calibration/confidence_entropy": 0.48210036103995263,
|
|
"calibration/coverage@0%": 0.0078125,
|
|
"calibration/coverage@1%": 0.0078125,
|
|
"calibration/coverage@10%": 0.0419921875,
|
|
"calibration/coverage@15%": 0.083984375,
|
|
"calibration/coverage@20%": 0.1103515625,
|
|
"calibration/coverage@25%": 0.552734375,
|
|
"calibration/coverage@30%": 0.6474609375,
|
|
"calibration/coverage@5%": 0.0078125,
|
|
"calibration/ece": 0.17189453124999998,
|
|
"calibration/mean_confidence": 0.5929296875000001,
|
|
"calibration/prompt_uniqueness": 0.853271484375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 517.5,
|
|
"completions/max_terminated_length": 517.5,
|
|
"completions/mean_length": 223.3858871459961,
|
|
"completions/mean_terminated_length": 223.3858871459961,
|
|
"completions/min_length": 98.5,
|
|
"completions/min_terminated_length": 98.5,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1063675115.0,
|
|
"reward": 0.9411610662937164,
|
|
"reward_std": 0.07683784514665604,
|
|
"rewards/accuracy_reward": 0.554443359375,
|
|
"rewards/brier_reward": 0.7697184383869171,
|
|
"rewards/confidence_uniqueness_reward": 0.9654731750488281,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.003481648047454655,
|
|
"rewards/frontier_coverage_0": 0.06845960207283497,
|
|
"rewards/frontier_coverage_1": 0.06845960207283497,
|
|
"rewards/frontier_coverage_10": 0.06917408481240273,
|
|
"rewards/frontier_coverage_15": 0.059449709951877594,
|
|
"rewards/frontier_coverage_20": 0.04144248925149441,
|
|
"rewards/frontier_coverage_25": 0.03584153205156326,
|
|
"rewards/frontier_coverage_5": 0.06845960207283497,
|
|
"rewards/frontier_ece_reward": 0.0017672271933406591,
|
|
"rewards/frontier_entropy_batch_reward": -0.14854049682617188,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0879669189453125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1669921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11592860147356987,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04398345947265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04398345947265625,
|
|
"signal/advantage_abs_mean": 0.060594651848077774,
|
|
"signal/advantage_pre_scale_abs_mean": 0.060594651848077774,
|
|
"signal/advantage_pre_scale_std": 0.10347720980644226,
|
|
"signal/advantage_std": 0.10347720980644226,
|
|
"signal/brier_reward/centered_abs_mean": 0.11784609407186508,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8671875,
|
|
"signal/brier_reward/group_std_mean": 0.15063194930553436,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011784609407186508,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011784609407186508,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013161659240722656,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.849609375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016746241133660078,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013161659589968622,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013161659589968622,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032848347909748554,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69921875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005517321405932307,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.106043343199417e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.106043343199417e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14311717450618744,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18515148013830185,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017889646114781499,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017889646114781499,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14311717450618744,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18515148013830185,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017889646114781499,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017889646114781499,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14185547828674316,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18353784829378128,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017731935367919505,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017731935367919505,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1278558410704136,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16538064926862717,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001598198083229363,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001598198083229363,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.070084098726511,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.884765625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09156738221645355,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008760512573644519,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008760512573644519,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.051280662417411804,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06649521738290787,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006410082860384136,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006410082860384136,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14311717450618744,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18515148013830185,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017889646114781499,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017889646114781499,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004005152499303222,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.6640625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00511455861851573,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004005152586614713,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004005152586614713,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.19959602504968643,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.763671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2626366764307022,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019959602504968643,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.019959602504968643,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.0045177273673172565,
|
|
"train_runtime": 60351.758,
|
|
"train_samples_per_second": 0.331,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1063675115,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|