2838 lines
78 KiB
JSON
2838 lines
78 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 7.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 1274,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.027573529411764705,
|
||
|
|
"grad_norm": 12.922369370251287,
|
||
|
|
"learning_rate": 1.25e-06,
|
||
|
|
"loss": 1.0219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.35352784395217896,
|
||
|
|
"step": 5,
|
||
|
|
"valid_targets_mean": 10593.7,
|
||
|
|
"valid_targets_min": 1314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05514705882352941,
|
||
|
|
"grad_norm": 9.914692363999333,
|
||
|
|
"learning_rate": 2.8125e-06,
|
||
|
|
"loss": 1.0055,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.33466315269470215,
|
||
|
|
"step": 10,
|
||
|
|
"valid_targets_mean": 9719.9,
|
||
|
|
"valid_targets_min": 4041
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08272058823529412,
|
||
|
|
"grad_norm": 5.140457433964579,
|
||
|
|
"learning_rate": 4.3750000000000005e-06,
|
||
|
|
"loss": 0.9532,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.31088295578956604,
|
||
|
|
"step": 15,
|
||
|
|
"valid_targets_mean": 9489.2,
|
||
|
|
"valid_targets_min": 3677
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11029411764705882,
|
||
|
|
"grad_norm": 2.3405453339381914,
|
||
|
|
"learning_rate": 5.9375e-06,
|
||
|
|
"loss": 0.8851,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.28772222995758057,
|
||
|
|
"step": 20,
|
||
|
|
"valid_targets_mean": 10487.8,
|
||
|
|
"valid_targets_min": 1794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13786764705882354,
|
||
|
|
"grad_norm": 1.6997672536663537,
|
||
|
|
"learning_rate": 7.500000000000001e-06,
|
||
|
|
"loss": 0.8505,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2901165187358856,
|
||
|
|
"step": 25,
|
||
|
|
"valid_targets_mean": 10282.9,
|
||
|
|
"valid_targets_min": 1881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16544117647058823,
|
||
|
|
"grad_norm": 1.3985790541526377,
|
||
|
|
"learning_rate": 9.0625e-06,
|
||
|
|
"loss": 0.8194,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2608858346939087,
|
||
|
|
"step": 30,
|
||
|
|
"valid_targets_mean": 9144.1,
|
||
|
|
"valid_targets_min": 2515
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19301470588235295,
|
||
|
|
"grad_norm": 0.8623722371179741,
|
||
|
|
"learning_rate": 1.0625e-05,
|
||
|
|
"loss": 0.79,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2887074649333954,
|
||
|
|
"step": 35,
|
||
|
|
"valid_targets_mean": 10813.7,
|
||
|
|
"valid_targets_min": 3092
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22058823529411764,
|
||
|
|
"grad_norm": 0.8093983860510723,
|
||
|
|
"learning_rate": 1.2187500000000001e-05,
|
||
|
|
"loss": 0.7763,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.26193925738334656,
|
||
|
|
"step": 40,
|
||
|
|
"valid_targets_mean": 10127.0,
|
||
|
|
"valid_targets_min": 2496
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24816176470588236,
|
||
|
|
"grad_norm": 0.5974300980069079,
|
||
|
|
"learning_rate": 1.375e-05,
|
||
|
|
"loss": 0.7367,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2483624666929245,
|
||
|
|
"step": 45,
|
||
|
|
"valid_targets_mean": 10489.3,
|
||
|
|
"valid_targets_min": 3368
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2757352941176471,
|
||
|
|
"grad_norm": 0.46168263473020144,
|
||
|
|
"learning_rate": 1.5312500000000003e-05,
|
||
|
|
"loss": 0.7257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2395600974559784,
|
||
|
|
"step": 50,
|
||
|
|
"valid_targets_mean": 10156.6,
|
||
|
|
"valid_targets_min": 1452
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30330882352941174,
|
||
|
|
"grad_norm": 0.4028635985676079,
|
||
|
|
"learning_rate": 1.6875e-05,
|
||
|
|
"loss": 0.7035,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20824620127677917,
|
||
|
|
"step": 55,
|
||
|
|
"valid_targets_mean": 8890.8,
|
||
|
|
"valid_targets_min": 1913
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33088235294117646,
|
||
|
|
"grad_norm": 0.3895890718140441,
|
||
|
|
"learning_rate": 1.84375e-05,
|
||
|
|
"loss": 0.6873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22504115104675293,
|
||
|
|
"step": 60,
|
||
|
|
"valid_targets_mean": 9894.5,
|
||
|
|
"valid_targets_min": 3118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3584558823529412,
|
||
|
|
"grad_norm": 0.30444276365102046,
|
||
|
|
"learning_rate": 2e-05,
|
||
|
|
"loss": 0.6644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20244480669498444,
|
||
|
|
"step": 65,
|
||
|
|
"valid_targets_mean": 9749.3,
|
||
|
|
"valid_targets_min": 1933
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3860294117647059,
|
||
|
|
"grad_norm": 0.2581486013897639,
|
||
|
|
"learning_rate": 2.1562500000000002e-05,
|
||
|
|
"loss": 0.6489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2169645130634308,
|
||
|
|
"step": 70,
|
||
|
|
"valid_targets_mean": 10628.2,
|
||
|
|
"valid_targets_min": 1803
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41360294117647056,
|
||
|
|
"grad_norm": 0.2631860893496907,
|
||
|
|
"learning_rate": 2.3125000000000003e-05,
|
||
|
|
"loss": 0.6346,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.22958728671073914,
|
||
|
|
"step": 75,
|
||
|
|
"valid_targets_mean": 10800.9,
|
||
|
|
"valid_targets_min": 2064
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4411764705882353,
|
||
|
|
"grad_norm": 0.274808899082167,
|
||
|
|
"learning_rate": 2.46875e-05,
|
||
|
|
"loss": 0.6256,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.23121704161167145,
|
||
|
|
"step": 80,
|
||
|
|
"valid_targets_mean": 11487.6,
|
||
|
|
"valid_targets_min": 4161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46875,
|
||
|
|
"grad_norm": 0.2583078645972247,
|
||
|
|
"learning_rate": 2.625e-05,
|
||
|
|
"loss": 0.612,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1923944652080536,
|
||
|
|
"step": 85,
|
||
|
|
"valid_targets_mean": 9252.9,
|
||
|
|
"valid_targets_min": 226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4963235294117647,
|
||
|
|
"grad_norm": 0.24880268689380583,
|
||
|
|
"learning_rate": 2.7812500000000002e-05,
|
||
|
|
"loss": 0.607,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19335561990737915,
|
||
|
|
"step": 90,
|
||
|
|
"valid_targets_mean": 9072.7,
|
||
|
|
"valid_targets_min": 1797
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5238970588235294,
|
||
|
|
"grad_norm": 0.2616086521839269,
|
||
|
|
"learning_rate": 2.9375000000000003e-05,
|
||
|
|
"loss": 0.5981,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19159752130508423,
|
||
|
|
"step": 95,
|
||
|
|
"valid_targets_mean": 9744.3,
|
||
|
|
"valid_targets_min": 1782
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5514705882352942,
|
||
|
|
"grad_norm": 0.27692625930198184,
|
||
|
|
"learning_rate": 3.09375e-05,
|
||
|
|
"loss": 0.5915,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18734252452850342,
|
||
|
|
"step": 100,
|
||
|
|
"valid_targets_mean": 8845.1,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5790441176470589,
|
||
|
|
"grad_norm": 0.2574148775678182,
|
||
|
|
"learning_rate": 3.2500000000000004e-05,
|
||
|
|
"loss": 0.5845,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16766202449798584,
|
||
|
|
"step": 105,
|
||
|
|
"valid_targets_mean": 8674.7,
|
||
|
|
"valid_targets_min": 2213
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6066176470588235,
|
||
|
|
"grad_norm": 0.27869577303780235,
|
||
|
|
"learning_rate": 3.40625e-05,
|
||
|
|
"loss": 0.5806,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19295048713684082,
|
||
|
|
"step": 110,
|
||
|
|
"valid_targets_mean": 10376.7,
|
||
|
|
"valid_targets_min": 1765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6341911764705882,
|
||
|
|
"grad_norm": 0.2984993331075873,
|
||
|
|
"learning_rate": 3.5625000000000005e-05,
|
||
|
|
"loss": 0.5777,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20063826441764832,
|
||
|
|
"step": 115,
|
||
|
|
"valid_targets_mean": 9936.5,
|
||
|
|
"valid_targets_min": 1494
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6617647058823529,
|
||
|
|
"grad_norm": 0.271090279172367,
|
||
|
|
"learning_rate": 3.71875e-05,
|
||
|
|
"loss": 0.5672,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18824777007102966,
|
||
|
|
"step": 120,
|
||
|
|
"valid_targets_mean": 9430.4,
|
||
|
|
"valid_targets_min": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6893382352941176,
|
||
|
|
"grad_norm": 0.29538515374006735,
|
||
|
|
"learning_rate": 3.875e-05,
|
||
|
|
"loss": 0.5646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1923852413892746,
|
||
|
|
"step": 125,
|
||
|
|
"valid_targets_mean": 9131.7,
|
||
|
|
"valid_targets_min": 1686
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7169117647058824,
|
||
|
|
"grad_norm": 0.3122370627704027,
|
||
|
|
"learning_rate": 3.999992484978314e-05,
|
||
|
|
"loss": 0.5644,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.20504149794578552,
|
||
|
|
"step": 130,
|
||
|
|
"valid_targets_mean": 10173.0,
|
||
|
|
"valid_targets_min": 1298
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7444852941176471,
|
||
|
|
"grad_norm": 0.34703468911750324,
|
||
|
|
"learning_rate": 3.999729465149199e-05,
|
||
|
|
"loss": 0.557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17850443720817566,
|
||
|
|
"step": 135,
|
||
|
|
"valid_targets_mean": 9228.8,
|
||
|
|
"valid_targets_min": 924
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7720588235294118,
|
||
|
|
"grad_norm": 0.332190362036781,
|
||
|
|
"learning_rate": 3.9990907507094396e-05,
|
||
|
|
"loss": 0.5497,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16969668865203857,
|
||
|
|
"step": 140,
|
||
|
|
"valid_targets_mean": 8790.3,
|
||
|
|
"valid_targets_min": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7996323529411765,
|
||
|
|
"grad_norm": 0.3034703265351846,
|
||
|
|
"learning_rate": 3.9980764616560544e-05,
|
||
|
|
"loss": 0.5564,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18241870403289795,
|
||
|
|
"step": 145,
|
||
|
|
"valid_targets_mean": 9520.5,
|
||
|
|
"valid_targets_min": 1223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8272058823529411,
|
||
|
|
"grad_norm": 0.3259015215149292,
|
||
|
|
"learning_rate": 3.9966867885462854e-05,
|
||
|
|
"loss": 0.5555,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19907459616661072,
|
||
|
|
"step": 150,
|
||
|
|
"valid_targets_mean": 11470.9,
|
||
|
|
"valid_targets_min": 3448
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8547794117647058,
|
||
|
|
"grad_norm": 0.3279486360331197,
|
||
|
|
"learning_rate": 3.994921992461797e-05,
|
||
|
|
"loss": 0.5461,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18447428941726685,
|
||
|
|
"step": 155,
|
||
|
|
"valid_targets_mean": 9752.1,
|
||
|
|
"valid_targets_min": 3827
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8823529411764706,
|
||
|
|
"grad_norm": 0.3415570189377227,
|
||
|
|
"learning_rate": 3.992782404959627e-05,
|
||
|
|
"loss": 0.5485,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18221747875213623,
|
||
|
|
"step": 160,
|
||
|
|
"valid_targets_mean": 9179.8,
|
||
|
|
"valid_targets_min": 1780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9099264705882353,
|
||
|
|
"grad_norm": 0.39579278374013543,
|
||
|
|
"learning_rate": 3.9902684280098965e-05,
|
||
|
|
"loss": 0.5476,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18513306975364685,
|
||
|
|
"step": 165,
|
||
|
|
"valid_targets_mean": 10199.6,
|
||
|
|
"valid_targets_min": 2157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9375,
|
||
|
|
"grad_norm": 0.2798313329426632,
|
||
|
|
"learning_rate": 3.987380533920287e-05,
|
||
|
|
"loss": 0.5392,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.2026323676109314,
|
||
|
|
"step": 170,
|
||
|
|
"valid_targets_mean": 11315.1,
|
||
|
|
"valid_targets_min": 6090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9650735294117647,
|
||
|
|
"grad_norm": 0.3493942306576815,
|
||
|
|
"learning_rate": 3.984119265247314e-05,
|
||
|
|
"loss": 0.5474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15390989184379578,
|
||
|
|
"step": 175,
|
||
|
|
"valid_targets_mean": 8281.4,
|
||
|
|
"valid_targets_min": 2125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9926470588235294,
|
||
|
|
"grad_norm": 0.36521635704700256,
|
||
|
|
"learning_rate": 3.9804852346943866e-05,
|
||
|
|
"loss": 0.5426,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18721234798431396,
|
||
|
|
"step": 180,
|
||
|
|
"valid_targets_mean": 10394.0,
|
||
|
|
"valid_targets_min": 2057
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0165441176470589,
|
||
|
|
"grad_norm": 0.30506233144116673,
|
||
|
|
"learning_rate": 3.9764791249967044e-05,
|
||
|
|
"loss": 0.5428,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19133886694908142,
|
||
|
|
"step": 185,
|
||
|
|
"valid_targets_mean": 10289.3,
|
||
|
|
"valid_targets_min": 2465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0441176470588236,
|
||
|
|
"grad_norm": 0.2910843215873972,
|
||
|
|
"learning_rate": 3.972101688792986e-05,
|
||
|
|
"loss": 0.5312,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1826484203338623,
|
||
|
|
"step": 190,
|
||
|
|
"valid_targets_mean": 9581.6,
|
||
|
|
"valid_targets_min": 1460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0716911764705883,
|
||
|
|
"grad_norm": 0.2755584276978832,
|
||
|
|
"learning_rate": 3.967353748484071e-05,
|
||
|
|
"loss": 0.5344,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18369868397712708,
|
||
|
|
"step": 195,
|
||
|
|
"valid_targets_mean": 10337.5,
|
||
|
|
"valid_targets_min": 2688
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.099264705882353,
|
||
|
|
"grad_norm": 0.359251780614338,
|
||
|
|
"learning_rate": 3.962236196078411e-05,
|
||
|
|
"loss": 0.5225,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15699255466461182,
|
||
|
|
"step": 200,
|
||
|
|
"valid_targets_mean": 9281.1,
|
||
|
|
"valid_targets_min": 1589
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1268382352941178,
|
||
|
|
"grad_norm": 0.27720289986260105,
|
||
|
|
"learning_rate": 3.956749993024489e-05,
|
||
|
|
"loss": 0.5257,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.140591099858284,
|
||
|
|
"step": 205,
|
||
|
|
"valid_targets_mean": 7799.2,
|
||
|
|
"valid_targets_min": 1608
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1544117647058822,
|
||
|
|
"grad_norm": 0.26983976305045465,
|
||
|
|
"learning_rate": 3.950896170030186e-05,
|
||
|
|
"loss": 0.5259,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17038512229919434,
|
||
|
|
"step": 210,
|
||
|
|
"valid_targets_mean": 9629.8,
|
||
|
|
"valid_targets_min": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.181985294117647,
|
||
|
|
"grad_norm": 0.2946999679550615,
|
||
|
|
"learning_rate": 3.9446758268691395e-05,
|
||
|
|
"loss": 0.5268,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18730860948562622,
|
||
|
|
"step": 215,
|
||
|
|
"valid_targets_mean": 9681.8,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2095588235294117,
|
||
|
|
"grad_norm": 0.299840004738946,
|
||
|
|
"learning_rate": 3.9380901321741315e-05,
|
||
|
|
"loss": 0.5185,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17023658752441406,
|
||
|
|
"step": 220,
|
||
|
|
"valid_targets_mean": 9639.6,
|
||
|
|
"valid_targets_min": 3931
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2371323529411764,
|
||
|
|
"grad_norm": 0.2966956776393768,
|
||
|
|
"learning_rate": 3.931140323217524e-05,
|
||
|
|
"loss": 0.526,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16511359810829163,
|
||
|
|
"step": 225,
|
||
|
|
"valid_targets_mean": 9019.7,
|
||
|
|
"valid_targets_min": 1681
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2647058823529411,
|
||
|
|
"grad_norm": 0.2881134761116685,
|
||
|
|
"learning_rate": 3.923827705678818e-05,
|
||
|
|
"loss": 0.5219,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17622773349285126,
|
||
|
|
"step": 230,
|
||
|
|
"valid_targets_mean": 9761.3,
|
||
|
|
"valid_targets_min": 1943
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2922794117647058,
|
||
|
|
"grad_norm": 0.3211671842301337,
|
||
|
|
"learning_rate": 3.916153653399352e-05,
|
||
|
|
"loss": 0.5215,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17426463961601257,
|
||
|
|
"step": 235,
|
||
|
|
"valid_targets_mean": 9748.3,
|
||
|
|
"valid_targets_min": 2276
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3198529411764706,
|
||
|
|
"grad_norm": 0.32683328829954483,
|
||
|
|
"learning_rate": 3.908119608124184e-05,
|
||
|
|
"loss": 0.522,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16995075345039368,
|
||
|
|
"step": 240,
|
||
|
|
"valid_targets_mean": 9808.2,
|
||
|
|
"valid_targets_min": 3060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3474264705882353,
|
||
|
|
"grad_norm": 0.2885476814272769,
|
||
|
|
"learning_rate": 3.8997270792312435e-05,
|
||
|
|
"loss": 0.5139,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.172532856464386,
|
||
|
|
"step": 245,
|
||
|
|
"valid_targets_mean": 9445.8,
|
||
|
|
"valid_targets_min": 1887
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.375,
|
||
|
|
"grad_norm": 0.40619327872879724,
|
||
|
|
"learning_rate": 3.890977643447746e-05,
|
||
|
|
"loss": 0.5224,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16190695762634277,
|
||
|
|
"step": 250,
|
||
|
|
"valid_targets_mean": 9265.1,
|
||
|
|
"valid_targets_min": 4316
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4025735294117647,
|
||
|
|
"grad_norm": 0.3079521022594924,
|
||
|
|
"learning_rate": 3.8818729445539765e-05,
|
||
|
|
"loss": 0.5096,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14533713459968567,
|
||
|
|
"step": 255,
|
||
|
|
"valid_targets_mean": 8282.3,
|
||
|
|
"valid_targets_min": 2412
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4301470588235294,
|
||
|
|
"grad_norm": 0.31036078222548275,
|
||
|
|
"learning_rate": 3.872414693074466e-05,
|
||
|
|
"loss": 0.5148,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1640068143606186,
|
||
|
|
"step": 260,
|
||
|
|
"valid_targets_mean": 9684.3,
|
||
|
|
"valid_targets_min": 3184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4577205882352942,
|
||
|
|
"grad_norm": 0.26541664634789813,
|
||
|
|
"learning_rate": 3.862604665956632e-05,
|
||
|
|
"loss": 0.5115,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17487174272537231,
|
||
|
|
"step": 265,
|
||
|
|
"valid_targets_mean": 10682.2,
|
||
|
|
"valid_targets_min": 4848
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4852941176470589,
|
||
|
|
"grad_norm": 0.28874244943207317,
|
||
|
|
"learning_rate": 3.8524447062369355e-05,
|
||
|
|
"loss": 0.5134,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16116756200790405,
|
||
|
|
"step": 270,
|
||
|
|
"valid_targets_mean": 9613.4,
|
||
|
|
"valid_targets_min": 1794
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5128676470588234,
|
||
|
|
"grad_norm": 0.3043371506396099,
|
||
|
|
"learning_rate": 3.8419367226946286e-05,
|
||
|
|
"loss": 0.5167,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1825055181980133,
|
||
|
|
"step": 275,
|
||
|
|
"valid_targets_mean": 11411.1,
|
||
|
|
"valid_targets_min": 3611
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5404411764705883,
|
||
|
|
"grad_norm": 0.3223108606843313,
|
||
|
|
"learning_rate": 3.831082689493143e-05,
|
||
|
|
"loss": 0.5176,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16505330801010132,
|
||
|
|
"step": 280,
|
||
|
|
"valid_targets_mean": 9131.8,
|
||
|
|
"valid_targets_min": 2184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5680147058823528,
|
||
|
|
"grad_norm": 0.27729135718776116,
|
||
|
|
"learning_rate": 3.819884645809203e-05,
|
||
|
|
"loss": 0.5147,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16858059167861938,
|
||
|
|
"step": 285,
|
||
|
|
"valid_targets_mean": 9615.7,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5955882352941178,
|
||
|
|
"grad_norm": 0.3660763161980808,
|
||
|
|
"learning_rate": 3.808344695449715e-05,
|
||
|
|
"loss": 0.5088,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1702558845281601,
|
||
|
|
"step": 290,
|
||
|
|
"valid_targets_mean": 10235.8,
|
||
|
|
"valid_targets_min": 4142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6231617647058822,
|
||
|
|
"grad_norm": 0.3041037414824209,
|
||
|
|
"learning_rate": 3.796465006456523e-05,
|
||
|
|
"loss": 0.5065,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1646835058927536,
|
||
|
|
"step": 295,
|
||
|
|
"valid_targets_mean": 9766.0,
|
||
|
|
"valid_targets_min": 1557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6507352941176472,
|
||
|
|
"grad_norm": 0.3198950333330496,
|
||
|
|
"learning_rate": 3.784247810699093e-05,
|
||
|
|
"loss": 0.5101,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18033772706985474,
|
||
|
|
"step": 300,
|
||
|
|
"valid_targets_mean": 10882.1,
|
||
|
|
"valid_targets_min": 3133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6783088235294117,
|
||
|
|
"grad_norm": 0.2915834656120734,
|
||
|
|
"learning_rate": 3.7716954034552004e-05,
|
||
|
|
"loss": 0.5113,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17557981610298157,
|
||
|
|
"step": 305,
|
||
|
|
"valid_targets_mean": 10674.8,
|
||
|
|
"valid_targets_min": 1627
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7058823529411766,
|
||
|
|
"grad_norm": 0.3586758923774135,
|
||
|
|
"learning_rate": 3.758810142979719e-05,
|
||
|
|
"loss": 0.5087,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17392417788505554,
|
||
|
|
"step": 310,
|
||
|
|
"valid_targets_mean": 9954.1,
|
||
|
|
"valid_targets_min": 1356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7334558823529411,
|
||
|
|
"grad_norm": 0.2645346812549258,
|
||
|
|
"learning_rate": 3.74559445006156e-05,
|
||
|
|
"loss": 0.5157,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17819076776504517,
|
||
|
|
"step": 315,
|
||
|
|
"valid_targets_mean": 10323.9,
|
||
|
|
"valid_targets_min": 2361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7610294117647058,
|
||
|
|
"grad_norm": 0.25271889404246967,
|
||
|
|
"learning_rate": 3.732050807568878e-05,
|
||
|
|
"loss": 0.5066,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17629846930503845,
|
||
|
|
"step": 320,
|
||
|
|
"valid_targets_mean": 11451.3,
|
||
|
|
"valid_targets_min": 1489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7886029411764706,
|
||
|
|
"grad_norm": 0.2763437959725264,
|
||
|
|
"learning_rate": 3.718181759982604e-05,
|
||
|
|
"loss": 0.5073,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1563217043876648,
|
||
|
|
"step": 325,
|
||
|
|
"valid_targets_mean": 8785.6,
|
||
|
|
"valid_targets_min": 2129
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8161764705882353,
|
||
|
|
"grad_norm": 0.30042350536966067,
|
||
|
|
"learning_rate": 3.703989912918409e-05,
|
||
|
|
"loss": 0.5054,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.176944762468338,
|
||
|
|
"step": 330,
|
||
|
|
"valid_targets_mean": 10494.3,
|
||
|
|
"valid_targets_min": 2765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.84375,
|
||
|
|
"grad_norm": 0.3378004054537919,
|
||
|
|
"learning_rate": 3.689477932637181e-05,
|
||
|
|
"loss": 0.5034,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1579429805278778,
|
||
|
|
"step": 335,
|
||
|
|
"valid_targets_mean": 9586.9,
|
||
|
|
"valid_targets_min": 1283
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8713235294117647,
|
||
|
|
"grad_norm": 0.2679674790343664,
|
||
|
|
"learning_rate": 3.674648545544104e-05,
|
||
|
|
"loss": 0.5077,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.178257018327713,
|
||
|
|
"step": 340,
|
||
|
|
"valid_targets_mean": 9507.9,
|
||
|
|
"valid_targets_min": 2612
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8988970588235294,
|
||
|
|
"grad_norm": 0.2579462646319047,
|
||
|
|
"learning_rate": 3.659504537676444e-05,
|
||
|
|
"loss": 0.4975,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15231987833976746,
|
||
|
|
"step": 345,
|
||
|
|
"valid_targets_mean": 9333.8,
|
||
|
|
"valid_targets_min": 1719
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9264705882352942,
|
||
|
|
"grad_norm": 0.2749601553036317,
|
||
|
|
"learning_rate": 3.6440487541801246e-05,
|
||
|
|
"loss": 0.4995,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16826336085796356,
|
||
|
|
"step": 350,
|
||
|
|
"valid_targets_mean": 9289.1,
|
||
|
|
"valid_targets_min": 2423
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9540441176470589,
|
||
|
|
"grad_norm": 0.27745508706156247,
|
||
|
|
"learning_rate": 3.628284098775207e-05,
|
||
|
|
"loss": 0.5038,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1490720808506012,
|
||
|
|
"step": 355,
|
||
|
|
"valid_targets_mean": 8659.3,
|
||
|
|
"valid_targets_min": 3197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9816176470588234,
|
||
|
|
"grad_norm": 0.255832293559672,
|
||
|
|
"learning_rate": 3.612213533210356e-05,
|
||
|
|
"loss": 0.5071,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1693265736103058,
|
||
|
|
"step": 360,
|
||
|
|
"valid_targets_mean": 10030.6,
|
||
|
|
"valid_targets_min": 2085
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.005514705882353,
|
||
|
|
"grad_norm": 0.33130827465355267,
|
||
|
|
"learning_rate": 3.595840076706411e-05,
|
||
|
|
"loss": 0.5046,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15648218989372253,
|
||
|
|
"step": 365,
|
||
|
|
"valid_targets_mean": 9743.0,
|
||
|
|
"valid_targets_min": 1915
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0330882352941178,
|
||
|
|
"grad_norm": 0.3384282546059246,
|
||
|
|
"learning_rate": 3.579166805389154e-05,
|
||
|
|
"loss": 0.4962,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16740265488624573,
|
||
|
|
"step": 370,
|
||
|
|
"valid_targets_mean": 9421.0,
|
||
|
|
"valid_targets_min": 1529
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0606617647058822,
|
||
|
|
"grad_norm": 0.33622127319274503,
|
||
|
|
"learning_rate": 3.562196851711391e-05,
|
||
|
|
"loss": 0.4878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16401368379592896,
|
||
|
|
"step": 375,
|
||
|
|
"valid_targets_mean": 8955.2,
|
||
|
|
"valid_targets_min": 1538
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.088235294117647,
|
||
|
|
"grad_norm": 0.2894449977451741,
|
||
|
|
"learning_rate": 3.5449334038644515e-05,
|
||
|
|
"loss": 0.5018,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16942408680915833,
|
||
|
|
"step": 380,
|
||
|
|
"valid_targets_mean": 9713.6,
|
||
|
|
"valid_targets_min": 1919
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1158088235294117,
|
||
|
|
"grad_norm": 0.34751761847543067,
|
||
|
|
"learning_rate": 3.5273797051792114e-05,
|
||
|
|
"loss": 0.4948,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1818588376045227,
|
||
|
|
"step": 385,
|
||
|
|
"valid_targets_mean": 10809.2,
|
||
|
|
"valid_targets_min": 4108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1433823529411766,
|
||
|
|
"grad_norm": 0.2635161588753612,
|
||
|
|
"learning_rate": 3.509539053516759e-05,
|
||
|
|
"loss": 0.4966,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16954530775547028,
|
||
|
|
"step": 390,
|
||
|
|
"valid_targets_mean": 10632.2,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.170955882352941,
|
||
|
|
"grad_norm": 0.24420082473821403,
|
||
|
|
"learning_rate": 3.49141480064882e-05,
|
||
|
|
"loss": 0.4982,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17877304553985596,
|
||
|
|
"step": 395,
|
||
|
|
"valid_targets_mean": 11006.1,
|
||
|
|
"valid_targets_min": 3599
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.198529411764706,
|
||
|
|
"grad_norm": 0.27155049413143767,
|
||
|
|
"learning_rate": 3.47301035162805e-05,
|
||
|
|
"loss": 0.4882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16475136578083038,
|
||
|
|
"step": 400,
|
||
|
|
"valid_targets_mean": 9823.3,
|
||
|
|
"valid_targets_min": 2475
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2261029411764706,
|
||
|
|
"grad_norm": 0.3287310658697828,
|
||
|
|
"learning_rate": 3.454329164148317e-05,
|
||
|
|
"loss": 0.4965,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16053670644760132,
|
||
|
|
"step": 405,
|
||
|
|
"valid_targets_mean": 9885.7,
|
||
|
|
"valid_targets_min": 3454
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2536764705882355,
|
||
|
|
"grad_norm": 0.2784404641293258,
|
||
|
|
"learning_rate": 3.435374747895095e-05,
|
||
|
|
"loss": 0.4873,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16837987303733826,
|
||
|
|
"step": 410,
|
||
|
|
"valid_targets_mean": 9935.2,
|
||
|
|
"valid_targets_min": 1837
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.28125,
|
||
|
|
"grad_norm": 0.28151764792692086,
|
||
|
|
"learning_rate": 3.4161506638860903e-05,
|
||
|
|
"loss": 0.4956,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15354721248149872,
|
||
|
|
"step": 415,
|
||
|
|
"valid_targets_mean": 9233.6,
|
||
|
|
"valid_targets_min": 1381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3088235294117645,
|
||
|
|
"grad_norm": 0.3543003365756421,
|
||
|
|
"learning_rate": 3.396660523802225e-05,
|
||
|
|
"loss": 0.4878,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15362058579921722,
|
||
|
|
"step": 420,
|
||
|
|
"valid_targets_mean": 9784.1,
|
||
|
|
"valid_targets_min": 2502
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3363970588235294,
|
||
|
|
"grad_norm": 0.3459844290219232,
|
||
|
|
"learning_rate": 3.376907989309097e-05,
|
||
|
|
"loss": 0.4898,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16421331465244293,
|
||
|
|
"step": 425,
|
||
|
|
"valid_targets_mean": 9645.9,
|
||
|
|
"valid_targets_min": 1466
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.363970588235294,
|
||
|
|
"grad_norm": 0.2867558328265102,
|
||
|
|
"learning_rate": 3.3568967713690574e-05,
|
||
|
|
"loss": 0.4911,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16321928799152374,
|
||
|
|
"step": 430,
|
||
|
|
"valid_targets_mean": 9835.3,
|
||
|
|
"valid_targets_min": 2574
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.391544117647059,
|
||
|
|
"grad_norm": 0.26780085668465703,
|
||
|
|
"learning_rate": 3.3366306295440195e-05,
|
||
|
|
"loss": 0.4835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17828045785427094,
|
||
|
|
"step": 435,
|
||
|
|
"valid_targets_mean": 11611.0,
|
||
|
|
"valid_targets_min": 2718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4191176470588234,
|
||
|
|
"grad_norm": 0.26518791813195564,
|
||
|
|
"learning_rate": 3.316113371289137e-05,
|
||
|
|
"loss": 0.4973,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15982230007648468,
|
||
|
|
"step": 440,
|
||
|
|
"valid_targets_mean": 9063.5,
|
||
|
|
"valid_targets_min": 3199
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4466911764705883,
|
||
|
|
"grad_norm": 0.28443782068251516,
|
||
|
|
"learning_rate": 3.295348851237494e-05,
|
||
|
|
"loss": 0.4926,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14535918831825256,
|
||
|
|
"step": 445,
|
||
|
|
"valid_targets_mean": 9235.6,
|
||
|
|
"valid_targets_min": 617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.474264705882353,
|
||
|
|
"grad_norm": 0.3188266718733089,
|
||
|
|
"learning_rate": 3.2743409704759175e-05,
|
||
|
|
"loss": 0.495,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18030281364917755,
|
||
|
|
"step": 450,
|
||
|
|
"valid_targets_mean": 10848.3,
|
||
|
|
"valid_targets_min": 4332
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5018382352941178,
|
||
|
|
"grad_norm": 0.2667422692086372,
|
||
|
|
"learning_rate": 3.253093675812073e-05,
|
||
|
|
"loss": 0.488,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15864768624305725,
|
||
|
|
"step": 455,
|
||
|
|
"valid_targets_mean": 9596.5,
|
||
|
|
"valid_targets_min": 1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5294117647058822,
|
||
|
|
"grad_norm": 0.2604029768908445,
|
||
|
|
"learning_rate": 3.231610959032968e-05,
|
||
|
|
"loss": 0.4885,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1746014654636383,
|
||
|
|
"step": 460,
|
||
|
|
"valid_targets_mean": 10420.3,
|
||
|
|
"valid_targets_min": 1764
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.556985294117647,
|
||
|
|
"grad_norm": 0.2858191680359426,
|
||
|
|
"learning_rate": 3.2098968561550024e-05,
|
||
|
|
"loss": 0.4868,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15824320912361145,
|
||
|
|
"step": 465,
|
||
|
|
"valid_targets_mean": 10062.4,
|
||
|
|
"valid_targets_min": 1489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5845588235294117,
|
||
|
|
"grad_norm": 0.2597776478065362,
|
||
|
|
"learning_rate": 3.18795544666571e-05,
|
||
|
|
"loss": 0.4875,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15228307247161865,
|
||
|
|
"step": 470,
|
||
|
|
"valid_targets_mean": 9501.3,
|
||
|
|
"valid_targets_min": 2570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6121323529411766,
|
||
|
|
"grad_norm": 0.2619195905683205,
|
||
|
|
"learning_rate": 3.1657908527573376e-05,
|
||
|
|
"loss": 0.489,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14361000061035156,
|
||
|
|
"step": 475,
|
||
|
|
"valid_targets_mean": 8306.0,
|
||
|
|
"valid_targets_min": 521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.639705882352941,
|
||
|
|
"grad_norm": 0.3061155537403774,
|
||
|
|
"learning_rate": 3.143407238552394e-05,
|
||
|
|
"loss": 0.4835,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13861876726150513,
|
||
|
|
"step": 480,
|
||
|
|
"valid_targets_mean": 9225.4,
|
||
|
|
"valid_targets_min": 344
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6672794117647056,
|
||
|
|
"grad_norm": 0.2602743554835305,
|
||
|
|
"learning_rate": 3.1208088093213276e-05,
|
||
|
|
"loss": 0.4882,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15896561741828918,
|
||
|
|
"step": 485,
|
||
|
|
"valid_targets_mean": 9260.0,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6948529411764706,
|
||
|
|
"grad_norm": 0.2724505413952303,
|
||
|
|
"learning_rate": 3.097999810692468e-05,
|
||
|
|
"loss": 0.4825,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14052462577819824,
|
||
|
|
"step": 490,
|
||
|
|
"valid_targets_mean": 8514.9,
|
||
|
|
"valid_targets_min": 2227
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7224264705882355,
|
||
|
|
"grad_norm": 0.2662825428558882,
|
||
|
|
"learning_rate": 3.074984527854392e-05,
|
||
|
|
"loss": 0.492,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16768789291381836,
|
||
|
|
"step": 495,
|
||
|
|
"valid_targets_mean": 9108.5,
|
||
|
|
"valid_targets_min": 413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.75,
|
||
|
|
"grad_norm": 0.2975150447539531,
|
||
|
|
"learning_rate": 3.0517672847508517e-05,
|
||
|
|
"loss": 0.4858,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15554597973823547,
|
||
|
|
"step": 500,
|
||
|
|
"valid_targets_mean": 9202.9,
|
||
|
|
"valid_targets_min": 967
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7775735294117645,
|
||
|
|
"grad_norm": 0.2550495273374524,
|
||
|
|
"learning_rate": 3.0283524432684214e-05,
|
||
|
|
"loss": 0.4909,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.178862527012825,
|
||
|
|
"step": 505,
|
||
|
|
"valid_targets_mean": 10239.5,
|
||
|
|
"valid_targets_min": 4379
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8051470588235294,
|
||
|
|
"grad_norm": 0.31261731562154393,
|
||
|
|
"learning_rate": 3.0047444024170197e-05,
|
||
|
|
"loss": 0.4781,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14991816878318787,
|
||
|
|
"step": 510,
|
||
|
|
"valid_targets_mean": 9316.9,
|
||
|
|
"valid_targets_min": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8327205882352944,
|
||
|
|
"grad_norm": 0.30993641338002453,
|
||
|
|
"learning_rate": 2.9809475975034586e-05,
|
||
|
|
"loss": 0.4862,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16486816108226776,
|
||
|
|
"step": 515,
|
||
|
|
"valid_targets_mean": 10542.5,
|
||
|
|
"valid_targets_min": 4704
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.860294117647059,
|
||
|
|
"grad_norm": 0.2824856754358018,
|
||
|
|
"learning_rate": 2.9569664992981648e-05,
|
||
|
|
"loss": 0.4807,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15252447128295898,
|
||
|
|
"step": 520,
|
||
|
|
"valid_targets_mean": 9458.6,
|
||
|
|
"valid_targets_min": 2709
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8878676470588234,
|
||
|
|
"grad_norm": 0.2930565337599535,
|
||
|
|
"learning_rate": 2.932805613195249e-05,
|
||
|
|
"loss": 0.4843,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14687579870224,
|
||
|
|
"step": 525,
|
||
|
|
"valid_targets_mean": 8935.3,
|
||
|
|
"valid_targets_min": 2361
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9154411764705883,
|
||
|
|
"grad_norm": 0.30339081178623667,
|
||
|
|
"learning_rate": 2.9084694783660615e-05,
|
||
|
|
"loss": 0.4824,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.19605809450149536,
|
||
|
|
"step": 530,
|
||
|
|
"valid_targets_mean": 11474.3,
|
||
|
|
"valid_targets_min": 3566
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.943014705882353,
|
||
|
|
"grad_norm": 0.35774459464396025,
|
||
|
|
"learning_rate": 2.8839626669064073e-05,
|
||
|
|
"loss": 0.486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15574738383293152,
|
||
|
|
"step": 535,
|
||
|
|
"valid_targets_mean": 9388.9,
|
||
|
|
"valid_targets_min": 1223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9705882352941178,
|
||
|
|
"grad_norm": 0.23890534631672108,
|
||
|
|
"learning_rate": 2.8592897829775732e-05,
|
||
|
|
"loss": 0.4887,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1441933661699295,
|
||
|
|
"step": 540,
|
||
|
|
"valid_targets_mean": 9105.8,
|
||
|
|
"valid_targets_min": 2471
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9981617647058822,
|
||
|
|
"grad_norm": 0.2874190440574071,
|
||
|
|
"learning_rate": 2.8344554619413355e-05,
|
||
|
|
"loss": 0.486,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16626250743865967,
|
||
|
|
"step": 545,
|
||
|
|
"valid_targets_mean": 9902.8,
|
||
|
|
"valid_targets_min": 1834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0220588235294117,
|
||
|
|
"grad_norm": 0.2797379791583693,
|
||
|
|
"learning_rate": 2.8094643694890947e-05,
|
||
|
|
"loss": 0.4762,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15490218997001648,
|
||
|
|
"step": 550,
|
||
|
|
"valid_targets_mean": 9643.8,
|
||
|
|
"valid_targets_min": 2579
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0496323529411766,
|
||
|
|
"grad_norm": 0.2781063666088034,
|
||
|
|
"learning_rate": 2.784321200765326e-05,
|
||
|
|
"loss": 0.4829,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1765371710062027,
|
||
|
|
"step": 555,
|
||
|
|
"valid_targets_mean": 10938.4,
|
||
|
|
"valid_targets_min": 3219
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.077205882352941,
|
||
|
|
"grad_norm": 0.2633292268883383,
|
||
|
|
"learning_rate": 2.7590306794854853e-05,
|
||
|
|
"loss": 0.4862,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1536048948764801,
|
||
|
|
"step": 560,
|
||
|
|
"valid_targets_mean": 9472.4,
|
||
|
|
"valid_targets_min": 2616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.104779411764706,
|
||
|
|
"grad_norm": 0.3024333218240714,
|
||
|
|
"learning_rate": 2.7335975570485552e-05,
|
||
|
|
"loss": 0.4809,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16254714131355286,
|
||
|
|
"step": 565,
|
||
|
|
"valid_targets_mean": 9964.5,
|
||
|
|
"valid_targets_min": 1829
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1323529411764706,
|
||
|
|
"grad_norm": 0.296651896630342,
|
||
|
|
"learning_rate": 2.7080266116443855e-05,
|
||
|
|
"loss": 0.4784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1577146351337433,
|
||
|
|
"step": 570,
|
||
|
|
"valid_targets_mean": 8750.8,
|
||
|
|
"valid_targets_min": 698
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1599264705882355,
|
||
|
|
"grad_norm": 0.2731702178415364,
|
||
|
|
"learning_rate": 2.6823226473559992e-05,
|
||
|
|
"loss": 0.4783,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16758793592453003,
|
||
|
|
"step": 575,
|
||
|
|
"valid_targets_mean": 10279.2,
|
||
|
|
"valid_targets_min": 2019
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.1875,
|
||
|
|
"grad_norm": 0.30439545297509896,
|
||
|
|
"learning_rate": 2.656490493257042e-05,
|
||
|
|
"loss": 0.4725,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1514369696378708,
|
||
|
|
"step": 580,
|
||
|
|
"valid_targets_mean": 9135.0,
|
||
|
|
"valid_targets_min": 2521
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2150735294117645,
|
||
|
|
"grad_norm": 0.27128678116813276,
|
||
|
|
"learning_rate": 2.6305350025045257e-05,
|
||
|
|
"loss": 0.478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16004298627376556,
|
||
|
|
"step": 585,
|
||
|
|
"valid_targets_mean": 9188.1,
|
||
|
|
"valid_targets_min": 1834
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.2426470588235294,
|
||
|
|
"grad_norm": 0.2915602972024743,
|
||
|
|
"learning_rate": 2.604461051427054e-05,
|
||
|
|
"loss": 0.4767,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1670864373445511,
|
||
|
|
"step": 590,
|
||
|
|
"valid_targets_mean": 9554.1,
|
||
|
|
"valid_targets_min": 2225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.270220588235294,
|
||
|
|
"grad_norm": 0.2652841967385897,
|
||
|
|
"learning_rate": 2.5782735386086954e-05,
|
||
|
|
"loss": 0.478,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18112346529960632,
|
||
|
|
"step": 595,
|
||
|
|
"valid_targets_mean": 10889.3,
|
||
|
|
"valid_targets_min": 2020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.297794117647059,
|
||
|
|
"grad_norm": 0.29039039949321094,
|
||
|
|
"learning_rate": 2.5519773839686707e-05,
|
||
|
|
"loss": 0.4792,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16282935440540314,
|
||
|
|
"step": 600,
|
||
|
|
"valid_targets_mean": 9785.2,
|
||
|
|
"valid_targets_min": 2533
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3253676470588234,
|
||
|
|
"grad_norm": 0.3075493577268038,
|
||
|
|
"learning_rate": 2.525577527837036e-05,
|
||
|
|
"loss": 0.4755,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16678908467292786,
|
||
|
|
"step": 605,
|
||
|
|
"valid_targets_mean": 10208.7,
|
||
|
|
"valid_targets_min": 1585
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.3529411764705883,
|
||
|
|
"grad_norm": 0.28091651740846124,
|
||
|
|
"learning_rate": 2.4990789300265256e-05,
|
||
|
|
"loss": 0.4742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15700221061706543,
|
||
|
|
"step": 610,
|
||
|
|
"valid_targets_mean": 9146.5,
|
||
|
|
"valid_targets_min": 2410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.380514705882353,
|
||
|
|
"grad_norm": 0.28102447688530885,
|
||
|
|
"learning_rate": 2.472486568900745e-05,
|
||
|
|
"loss": 0.4695,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15741194784641266,
|
||
|
|
"step": 615,
|
||
|
|
"valid_targets_mean": 9390.8,
|
||
|
|
"valid_targets_min": 1976
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4080882352941178,
|
||
|
|
"grad_norm": 0.33795109998469414,
|
||
|
|
"learning_rate": 2.445805440438866e-05,
|
||
|
|
"loss": 0.4796,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1518850028514862,
|
||
|
|
"step": 620,
|
||
|
|
"valid_targets_mean": 9751.7,
|
||
|
|
"valid_targets_min": 1398
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4356617647058822,
|
||
|
|
"grad_norm": 0.2899570887713248,
|
||
|
|
"learning_rate": 2.419040557297024e-05,
|
||
|
|
"loss": 0.4784,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15818291902542114,
|
||
|
|
"step": 625,
|
||
|
|
"valid_targets_mean": 9005.8,
|
||
|
|
"valid_targets_min": 1875
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.463235294117647,
|
||
|
|
"grad_norm": 0.27113682924469873,
|
||
|
|
"learning_rate": 2.3921969478665702e-05,
|
||
|
|
"loss": 0.4742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14796185493469238,
|
||
|
|
"step": 630,
|
||
|
|
"valid_targets_mean": 8848.4,
|
||
|
|
"valid_targets_min": 1331
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.4908088235294117,
|
||
|
|
"grad_norm": 0.30195562694475775,
|
||
|
|
"learning_rate": 2.3652796553293794e-05,
|
||
|
|
"loss": 0.4707,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1724786013364792,
|
||
|
|
"step": 635,
|
||
|
|
"valid_targets_mean": 10914.3,
|
||
|
|
"valid_targets_min": 2947
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5183823529411766,
|
||
|
|
"grad_norm": 0.27090220964326883,
|
||
|
|
"learning_rate": 2.338293736710373e-05,
|
||
|
|
"loss": 0.4748,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1456066370010376,
|
||
|
|
"step": 640,
|
||
|
|
"valid_targets_mean": 8925.1,
|
||
|
|
"valid_targets_min": 617
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.545955882352941,
|
||
|
|
"grad_norm": 0.2644791412793208,
|
||
|
|
"learning_rate": 2.3112442619274408e-05,
|
||
|
|
"loss": 0.4756,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1511625200510025,
|
||
|
|
"step": 645,
|
||
|
|
"valid_targets_mean": 9793.2,
|
||
|
|
"valid_targets_min": 1724
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.5735294117647056,
|
||
|
|
"grad_norm": 0.25813430075433286,
|
||
|
|
"learning_rate": 2.2841363128389388e-05,
|
||
|
|
"loss": 0.4839,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16180767118930817,
|
||
|
|
"step": 650,
|
||
|
|
"valid_targets_mean": 10196.3,
|
||
|
|
"valid_targets_min": 2718
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6011029411764706,
|
||
|
|
"grad_norm": 0.27785983668454983,
|
||
|
|
"learning_rate": 2.2569749822889526e-05,
|
||
|
|
"loss": 0.4758,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1579504758119583,
|
||
|
|
"step": 655,
|
||
|
|
"valid_targets_mean": 8632.2,
|
||
|
|
"valid_targets_min": 1381
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6286764705882355,
|
||
|
|
"grad_norm": 0.29111353881691315,
|
||
|
|
"learning_rate": 2.229765373150489e-05,
|
||
|
|
"loss": 0.4719,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15923120081424713,
|
||
|
|
"step": 660,
|
||
|
|
"valid_targets_mean": 9744.8,
|
||
|
|
"valid_targets_min": 1369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.65625,
|
||
|
|
"grad_norm": 0.25730565829379,
|
||
|
|
"learning_rate": 2.2025125973667817e-05,
|
||
|
|
"loss": 0.4679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13490340113639832,
|
||
|
|
"step": 665,
|
||
|
|
"valid_targets_mean": 8098.9,
|
||
|
|
"valid_targets_min": 1119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.6838235294117645,
|
||
|
|
"grad_norm": 0.25083718631280677,
|
||
|
|
"learning_rate": 2.1752217749908997e-05,
|
||
|
|
"loss": 0.4714,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16616028547286987,
|
||
|
|
"step": 670,
|
||
|
|
"valid_targets_mean": 10727.1,
|
||
|
|
"valid_targets_min": 1686
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7113970588235294,
|
||
|
|
"grad_norm": 0.29118617817633485,
|
||
|
|
"learning_rate": 2.147898033223831e-05,
|
||
|
|
"loss": 0.4709,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1437336653470993,
|
||
|
|
"step": 675,
|
||
|
|
"valid_targets_mean": 8196.6,
|
||
|
|
"valid_targets_min": 643
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7389705882352944,
|
||
|
|
"grad_norm": 0.2868072475645195,
|
||
|
|
"learning_rate": 2.120546505451218e-05,
|
||
|
|
"loss": 0.4716,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1592090129852295,
|
||
|
|
"step": 680,
|
||
|
|
"valid_targets_mean": 9257.1,
|
||
|
|
"valid_targets_min": 2425
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.766544117647059,
|
||
|
|
"grad_norm": 0.2646065431597373,
|
||
|
|
"learning_rate": 2.0931723302789346e-05,
|
||
|
|
"loss": 0.4739,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15621066093444824,
|
||
|
|
"step": 685,
|
||
|
|
"valid_targets_mean": 9754.2,
|
||
|
|
"valid_targets_min": 3945
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.7941176470588234,
|
||
|
|
"grad_norm": 0.27437618997704516,
|
||
|
|
"learning_rate": 2.065780650567683e-05,
|
||
|
|
"loss": 0.474,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14808349311351776,
|
||
|
|
"step": 690,
|
||
|
|
"valid_targets_mean": 9839.2,
|
||
|
|
"valid_targets_min": 4592
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8216911764705883,
|
||
|
|
"grad_norm": 0.24821319026154287,
|
||
|
|
"learning_rate": 2.038376612466793e-05,
|
||
|
|
"loss": 0.4742,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16178636252880096,
|
||
|
|
"step": 695,
|
||
|
|
"valid_targets_mean": 11111.1,
|
||
|
|
"valid_targets_min": 1996
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.849264705882353,
|
||
|
|
"grad_norm": 0.2465619978535496,
|
||
|
|
"learning_rate": 2.0109653644473966e-05,
|
||
|
|
"loss": 0.471,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1422886848449707,
|
||
|
|
"step": 700,
|
||
|
|
"valid_targets_mean": 10060.5,
|
||
|
|
"valid_targets_min": 313
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.8768382352941178,
|
||
|
|
"grad_norm": 0.3119293770555057,
|
||
|
|
"learning_rate": 1.9835520563351735e-05,
|
||
|
|
"loss": 0.468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1610221415758133,
|
||
|
|
"step": 705,
|
||
|
|
"valid_targets_mean": 10060.2,
|
||
|
|
"valid_targets_min": 1469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9044117647058822,
|
||
|
|
"grad_norm": 0.27494281301179807,
|
||
|
|
"learning_rate": 1.9561418383428374e-05,
|
||
|
|
"loss": 0.4752,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.162495955824852,
|
||
|
|
"step": 710,
|
||
|
|
"valid_targets_mean": 9654.4,
|
||
|
|
"valid_targets_min": 4185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.931985294117647,
|
||
|
|
"grad_norm": 0.26008772339215747,
|
||
|
|
"learning_rate": 1.9287398601025562e-05,
|
||
|
|
"loss": 0.4705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15456292033195496,
|
||
|
|
"step": 715,
|
||
|
|
"valid_targets_mean": 9493.3,
|
||
|
|
"valid_targets_min": 2208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9595588235294117,
|
||
|
|
"grad_norm": 0.271085036968312,
|
||
|
|
"learning_rate": 1.9013512696984696e-05,
|
||
|
|
"loss": 0.4689,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1571996957063675,
|
||
|
|
"step": 720,
|
||
|
|
"valid_targets_mean": 9132.5,
|
||
|
|
"valid_targets_min": 1567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.9871323529411766,
|
||
|
|
"grad_norm": 0.2411363224005689,
|
||
|
|
"learning_rate": 1.8739812126995093e-05,
|
||
|
|
"loss": 0.4722,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14264698326587677,
|
||
|
|
"step": 725,
|
||
|
|
"valid_targets_mean": 9456.3,
|
||
|
|
"valid_targets_min": 1832
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.011029411764706,
|
||
|
|
"grad_norm": 0.2359597961140309,
|
||
|
|
"learning_rate": 1.8466348311926863e-05,
|
||
|
|
"loss": 0.466,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15933065116405487,
|
||
|
|
"step": 730,
|
||
|
|
"valid_targets_mean": 10693.0,
|
||
|
|
"valid_targets_min": 5400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.038602941176471,
|
||
|
|
"grad_norm": 0.26309525718508847,
|
||
|
|
"learning_rate": 1.8193172628170324e-05,
|
||
|
|
"loss": 0.4682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15065504610538483,
|
||
|
|
"step": 735,
|
||
|
|
"valid_targets_mean": 10060.2,
|
||
|
|
"valid_targets_min": 505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.0661764705882355,
|
||
|
|
"grad_norm": 0.2473581577370302,
|
||
|
|
"learning_rate": 1.792033639798377e-05,
|
||
|
|
"loss": 0.4703,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1632881462574005,
|
||
|
|
"step": 740,
|
||
|
|
"valid_targets_mean": 9911.9,
|
||
|
|
"valid_targets_min": 1738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.09375,
|
||
|
|
"grad_norm": 0.2565338212535585,
|
||
|
|
"learning_rate": 1.764789087985145e-05,
|
||
|
|
"loss": 0.465,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13955152034759521,
|
||
|
|
"step": 745,
|
||
|
|
"valid_targets_mean": 8719.0,
|
||
|
|
"valid_targets_min": 835
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.1213235294117645,
|
||
|
|
"grad_norm": 0.29464540307392384,
|
||
|
|
"learning_rate": 1.737588725885345e-05,
|
||
|
|
"loss": 0.4625,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15189291536808014,
|
||
|
|
"step": 750,
|
||
|
|
"valid_targets_mean": 9459.8,
|
||
|
|
"valid_targets_min": 2465
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.148897058823529,
|
||
|
|
"grad_norm": 0.29502353988540564,
|
||
|
|
"learning_rate": 1.7104376637049474e-05,
|
||
|
|
"loss": 0.4647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13400202989578247,
|
||
|
|
"step": 755,
|
||
|
|
"valid_targets_mean": 8704.5,
|
||
|
|
"valid_targets_min": 1765
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.176470588235294,
|
||
|
|
"grad_norm": 0.2954768301391897,
|
||
|
|
"learning_rate": 1.6833410023878104e-05,
|
||
|
|
"loss": 0.4711,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15495823323726654,
|
||
|
|
"step": 760,
|
||
|
|
"valid_targets_mean": 10454.3,
|
||
|
|
"valid_targets_min": 1863
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.204044117647059,
|
||
|
|
"grad_norm": 0.2420606644014351,
|
||
|
|
"learning_rate": 1.6563038326573544e-05,
|
||
|
|
"loss": 0.4664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14864429831504822,
|
||
|
|
"step": 765,
|
||
|
|
"valid_targets_mean": 9574.8,
|
||
|
|
"valid_targets_min": 2267
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.231617647058823,
|
||
|
|
"grad_norm": 0.22980463517846378,
|
||
|
|
"learning_rate": 1.6293312340601545e-05,
|
||
|
|
"loss": 0.4675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.168225958943367,
|
||
|
|
"step": 770,
|
||
|
|
"valid_targets_mean": 11102.2,
|
||
|
|
"valid_targets_min": 3249
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.259191176470588,
|
||
|
|
"grad_norm": 0.2360042458287228,
|
||
|
|
"learning_rate": 1.60242827401163e-05,
|
||
|
|
"loss": 0.4629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1540679782629013,
|
||
|
|
"step": 775,
|
||
|
|
"valid_targets_mean": 9416.9,
|
||
|
|
"valid_targets_min": 455
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.286764705882353,
|
||
|
|
"grad_norm": 0.24859722691681074,
|
||
|
|
"learning_rate": 1.5756000068440184e-05,
|
||
|
|
"loss": 0.4595,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14932796359062195,
|
||
|
|
"step": 780,
|
||
|
|
"valid_targets_mean": 9871.3,
|
||
|
|
"valid_targets_min": 1989
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.314338235294118,
|
||
|
|
"grad_norm": 0.24874113072111292,
|
||
|
|
"learning_rate": 1.548851472856802e-05,
|
||
|
|
"loss": 0.4659,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.12739743292331696,
|
||
|
|
"step": 785,
|
||
|
|
"valid_targets_mean": 8745.9,
|
||
|
|
"valid_targets_min": 1265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.341911764705882,
|
||
|
|
"grad_norm": 0.2616979627725329,
|
||
|
|
"learning_rate": 1.5221876973697729e-05,
|
||
|
|
"loss": 0.4675,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16212889552116394,
|
||
|
|
"step": 790,
|
||
|
|
"valid_targets_mean": 9819.2,
|
||
|
|
"valid_targets_min": 1922
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.369485294117647,
|
||
|
|
"grad_norm": 0.2605902813842398,
|
||
|
|
"learning_rate": 1.4956136897789155e-05,
|
||
|
|
"loss": 0.4682,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1644110381603241,
|
||
|
|
"step": 795,
|
||
|
|
"valid_targets_mean": 10543.6,
|
||
|
|
"valid_targets_min": 2238
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.397058823529412,
|
||
|
|
"grad_norm": 0.27655005722080517,
|
||
|
|
"learning_rate": 1.4691344426152733e-05,
|
||
|
|
"loss": 0.4684,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13570016622543335,
|
||
|
|
"step": 800,
|
||
|
|
"valid_targets_mean": 7597.6,
|
||
|
|
"valid_targets_min": 349
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.424632352941177,
|
||
|
|
"grad_norm": 0.2681286272432588,
|
||
|
|
"learning_rate": 1.4427549306069915e-05,
|
||
|
|
"loss": 0.468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17025525867938995,
|
||
|
|
"step": 805,
|
||
|
|
"valid_targets_mean": 10451.5,
|
||
|
|
"valid_targets_min": 2785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.452205882352941,
|
||
|
|
"grad_norm": 0.25262683253593055,
|
||
|
|
"learning_rate": 1.416480109744701e-05,
|
||
|
|
"loss": 0.4658,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14067339897155762,
|
||
|
|
"step": 810,
|
||
|
|
"valid_targets_mean": 9118.7,
|
||
|
|
"valid_targets_min": 375
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.479779411764706,
|
||
|
|
"grad_norm": 0.2833620837271674,
|
||
|
|
"learning_rate": 1.3903149163504221e-05,
|
||
|
|
"loss": 0.4733,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17299406230449677,
|
||
|
|
"step": 815,
|
||
|
|
"valid_targets_mean": 9922.2,
|
||
|
|
"valid_targets_min": 2029
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.507352941176471,
|
||
|
|
"grad_norm": 0.28237169634387094,
|
||
|
|
"learning_rate": 1.3642642661501641e-05,
|
||
|
|
"loss": 0.4637,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1708087921142578,
|
||
|
|
"step": 820,
|
||
|
|
"valid_targets_mean": 10745.8,
|
||
|
|
"valid_targets_min": 4792
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5349264705882355,
|
||
|
|
"grad_norm": 0.24345906344118387,
|
||
|
|
"learning_rate": 1.3383330533503971e-05,
|
||
|
|
"loss": 0.4705,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15734925866127014,
|
||
|
|
"step": 825,
|
||
|
|
"valid_targets_mean": 9852.8,
|
||
|
|
"valid_targets_min": 2881
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5625,
|
||
|
|
"grad_norm": 0.24266003465888375,
|
||
|
|
"learning_rate": 1.3125261497185588e-05,
|
||
|
|
"loss": 0.4646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14917130768299103,
|
||
|
|
"step": 830,
|
||
|
|
"valid_targets_mean": 10095.4,
|
||
|
|
"valid_targets_min": 1468
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.5900735294117645,
|
||
|
|
"grad_norm": 0.24693814616775356,
|
||
|
|
"learning_rate": 1.2868484036677896e-05,
|
||
|
|
"loss": 0.4676,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14834946393966675,
|
||
|
|
"step": 835,
|
||
|
|
"valid_targets_mean": 9061.1,
|
||
|
|
"valid_targets_min": 1531
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.617647058823529,
|
||
|
|
"grad_norm": 0.25767151072786965,
|
||
|
|
"learning_rate": 1.2613046393460411e-05,
|
||
|
|
"loss": 0.4694,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1626255214214325,
|
||
|
|
"step": 840,
|
||
|
|
"valid_targets_mean": 9341.8,
|
||
|
|
"valid_targets_min": 2334
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.645220588235294,
|
||
|
|
"grad_norm": 0.2523502952262432,
|
||
|
|
"learning_rate": 1.2358996557297532e-05,
|
||
|
|
"loss": 0.4685,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14751945436000824,
|
||
|
|
"step": 845,
|
||
|
|
"valid_targets_mean": 9205.8,
|
||
|
|
"valid_targets_min": 2278
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.672794117647059,
|
||
|
|
"grad_norm": 0.2671934827994343,
|
||
|
|
"learning_rate": 1.2106382257222595e-05,
|
||
|
|
"loss": 0.4657,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1465722620487213,
|
||
|
|
"step": 850,
|
||
|
|
"valid_targets_mean": 9662.6,
|
||
|
|
"valid_targets_min": 818
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.700367647058823,
|
||
|
|
"grad_norm": 0.2678177528324946,
|
||
|
|
"learning_rate": 1.1855250952570852e-05,
|
||
|
|
"loss": 0.4632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18250469863414764,
|
||
|
|
"step": 855,
|
||
|
|
"valid_targets_mean": 11265.1,
|
||
|
|
"valid_targets_min": 3015
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.727941176470588,
|
||
|
|
"grad_norm": 0.26786812942141464,
|
||
|
|
"learning_rate": 1.1605649824063176e-05,
|
||
|
|
"loss": 0.4704,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1485501229763031,
|
||
|
|
"step": 860,
|
||
|
|
"valid_targets_mean": 9213.1,
|
||
|
|
"valid_targets_min": 1661
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.755514705882353,
|
||
|
|
"grad_norm": 0.23312897885245942,
|
||
|
|
"learning_rate": 1.1357625764942095e-05,
|
||
|
|
"loss": 0.4646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16314777731895447,
|
||
|
|
"step": 865,
|
||
|
|
"valid_targets_mean": 9994.2,
|
||
|
|
"valid_targets_min": 1692
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.783088235294118,
|
||
|
|
"grad_norm": 0.24908363289906799,
|
||
|
|
"learning_rate": 1.1111225372161818e-05,
|
||
|
|
"loss": 0.463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15313473343849182,
|
||
|
|
"step": 870,
|
||
|
|
"valid_targets_mean": 9034.0,
|
||
|
|
"valid_targets_min": 1750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.810661764705882,
|
||
|
|
"grad_norm": 0.23939462069601722,
|
||
|
|
"learning_rate": 1.0866494937633953e-05,
|
||
|
|
"loss": 0.4616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14009788632392883,
|
||
|
|
"step": 875,
|
||
|
|
"valid_targets_mean": 8579.7,
|
||
|
|
"valid_targets_min": 1839
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.838235294117647,
|
||
|
|
"grad_norm": 0.23955295963992793,
|
||
|
|
"learning_rate": 1.0623480439530493e-05,
|
||
|
|
"loss": 0.468,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18267551064491272,
|
||
|
|
"step": 880,
|
||
|
|
"valid_targets_mean": 10546.3,
|
||
|
|
"valid_targets_min": 3024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.865808823529412,
|
||
|
|
"grad_norm": 0.2616170432420212,
|
||
|
|
"learning_rate": 1.038222753364581e-05,
|
||
|
|
"loss": 0.4693,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1501752734184265,
|
||
|
|
"step": 885,
|
||
|
|
"valid_targets_mean": 9260.8,
|
||
|
|
"valid_targets_min": 1575
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.893382352941177,
|
||
|
|
"grad_norm": 0.22421807483190181,
|
||
|
|
"learning_rate": 1.0142781544819158e-05,
|
||
|
|
"loss": 0.4669,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15006357431411743,
|
||
|
|
"step": 890,
|
||
|
|
"valid_targets_mean": 10454.8,
|
||
|
|
"valid_targets_min": 5073
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.920955882352941,
|
||
|
|
"grad_norm": 0.22432441115476565,
|
||
|
|
"learning_rate": 9.905187458419343e-06,
|
||
|
|
"loss": 0.4628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1583147495985031,
|
||
|
|
"step": 895,
|
||
|
|
"valid_targets_mean": 9921.4,
|
||
|
|
"valid_targets_min": 2207
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.948529411764706,
|
||
|
|
"grad_norm": 0.25000893710548316,
|
||
|
|
"learning_rate": 9.669489911893261e-06,
|
||
|
|
"loss": 0.4679,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16411854326725006,
|
||
|
|
"step": 900,
|
||
|
|
"valid_targets_mean": 9388.2,
|
||
|
|
"valid_targets_min": 3775
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 4.976102941176471,
|
||
|
|
"grad_norm": 0.2608534142049316,
|
||
|
|
"learning_rate": 9.435733186379694e-06,
|
||
|
|
"loss": 0.4591,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14242586493492126,
|
||
|
|
"step": 905,
|
||
|
|
"valid_targets_mean": 9715.4,
|
||
|
|
"valid_targets_min": 2367
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 0.528080421416379,
|
||
|
|
"learning_rate": 9.2039611983901e-06,
|
||
|
|
"loss": 0.4665,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.45787763595581055,
|
||
|
|
"step": 910,
|
||
|
|
"valid_targets_mean": 9017.6,
|
||
|
|
"valid_targets_min": 1527
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0275735294117645,
|
||
|
|
"grad_norm": 0.25364778323379056,
|
||
|
|
"learning_rate": 8.974217491557916e-06,
|
||
|
|
"loss": 0.4582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15120220184326172,
|
||
|
|
"step": 915,
|
||
|
|
"valid_targets_mean": 9724.2,
|
||
|
|
"valid_targets_min": 4256
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.055147058823529,
|
||
|
|
"grad_norm": 0.2720756490768129,
|
||
|
|
"learning_rate": 8.746545228457864e-06,
|
||
|
|
"loss": 0.4627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.18436521291732788,
|
||
|
|
"step": 920,
|
||
|
|
"valid_targets_mean": 10848.2,
|
||
|
|
"valid_targets_min": 2906
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.082720588235294,
|
||
|
|
"grad_norm": 0.2476109752744865,
|
||
|
|
"learning_rate": 8.520987182496916e-06,
|
||
|
|
"loss": 0.4615,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1586351841688156,
|
||
|
|
"step": 925,
|
||
|
|
"valid_targets_mean": 9801.3,
|
||
|
|
"valid_targets_min": 2511
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.110294117647059,
|
||
|
|
"grad_norm": 0.24561606722052834,
|
||
|
|
"learning_rate": 8.297585729878328e-06,
|
||
|
|
"loss": 0.4605,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15383873879909515,
|
||
|
|
"step": 930,
|
||
|
|
"valid_targets_mean": 10230.4,
|
||
|
|
"valid_targets_min": 1542
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.137867647058823,
|
||
|
|
"grad_norm": 0.23155125931202536,
|
||
|
|
"learning_rate": 8.076382841640278e-06,
|
||
|
|
"loss": 0.4595,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15065628290176392,
|
||
|
|
"step": 935,
|
||
|
|
"valid_targets_mean": 9486.9,
|
||
|
|
"valid_targets_min": 2504
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.165441176470588,
|
||
|
|
"grad_norm": 0.2386411883344939,
|
||
|
|
"learning_rate": 7.8574200757707e-06,
|
||
|
|
"loss": 0.4691,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16948550939559937,
|
||
|
|
"step": 940,
|
||
|
|
"valid_targets_mean": 10931.1,
|
||
|
|
"valid_targets_min": 1488
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.193014705882353,
|
||
|
|
"grad_norm": 0.24623043409190853,
|
||
|
|
"learning_rate": 7.640738569399645e-06,
|
||
|
|
"loss": 0.4627,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17989715933799744,
|
||
|
|
"step": 945,
|
||
|
|
"valid_targets_mean": 11805.3,
|
||
|
|
"valid_targets_min": 5070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.220588235294118,
|
||
|
|
"grad_norm": 0.25645332420887573,
|
||
|
|
"learning_rate": 7.426379031070736e-06,
|
||
|
|
"loss": 0.4653,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14318367838859558,
|
||
|
|
"step": 950,
|
||
|
|
"valid_targets_mean": 9511.3,
|
||
|
|
"valid_targets_min": 2171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.248161764705882,
|
||
|
|
"grad_norm": 0.2509019424889472,
|
||
|
|
"learning_rate": 7.214381733093156e-06,
|
||
|
|
"loss": 0.4623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14922644197940826,
|
||
|
|
"step": 955,
|
||
|
|
"valid_targets_mean": 9984.3,
|
||
|
|
"valid_targets_min": 3056
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.275735294117647,
|
||
|
|
"grad_norm": 0.22957419096439602,
|
||
|
|
"learning_rate": 7.004786503975552e-06,
|
||
|
|
"loss": 0.464,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15641915798187256,
|
||
|
|
"step": 960,
|
||
|
|
"valid_targets_mean": 10315.0,
|
||
|
|
"valid_targets_min": 4133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.303308823529412,
|
||
|
|
"grad_norm": 0.28021168498690796,
|
||
|
|
"learning_rate": 6.7976327209433855e-06,
|
||
|
|
"loss": 0.4604,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1481720507144928,
|
||
|
|
"step": 965,
|
||
|
|
"valid_targets_mean": 9206.2,
|
||
|
|
"valid_targets_min": 1886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.330882352941177,
|
||
|
|
"grad_norm": 0.2426926206109474,
|
||
|
|
"learning_rate": 6.592959302541004e-06,
|
||
|
|
"loss": 0.4589,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.173628568649292,
|
||
|
|
"step": 970,
|
||
|
|
"valid_targets_mean": 10906.5,
|
||
|
|
"valid_targets_min": 4457
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.358455882352941,
|
||
|
|
"grad_norm": 0.23348669978255307,
|
||
|
|
"learning_rate": 6.39080470131989e-06,
|
||
|
|
"loss": 0.4609,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16939929127693176,
|
||
|
|
"step": 975,
|
||
|
|
"valid_targets_mean": 10147.1,
|
||
|
|
"valid_targets_min": 1450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.386029411764706,
|
||
|
|
"grad_norm": 0.2413777466766637,
|
||
|
|
"learning_rate": 6.1912068966145145e-06,
|
||
|
|
"loss": 0.4565,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15642428398132324,
|
||
|
|
"step": 980,
|
||
|
|
"valid_targets_mean": 9358.1,
|
||
|
|
"valid_targets_min": 535
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.413602941176471,
|
||
|
|
"grad_norm": 0.2388089637438226,
|
||
|
|
"learning_rate": 5.994203387407036e-06,
|
||
|
|
"loss": 0.4629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1432351917028427,
|
||
|
|
"step": 985,
|
||
|
|
"valid_targets_mean": 9491.7,
|
||
|
|
"valid_targets_min": 1802
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4411764705882355,
|
||
|
|
"grad_norm": 0.2190404554471656,
|
||
|
|
"learning_rate": 5.7998311852822406e-06,
|
||
|
|
"loss": 0.4633,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16985687613487244,
|
||
|
|
"step": 990,
|
||
|
|
"valid_targets_mean": 10872.9,
|
||
|
|
"valid_targets_min": 2969
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.46875,
|
||
|
|
"grad_norm": 0.2238371695459484,
|
||
|
|
"learning_rate": 5.608126807474145e-06,
|
||
|
|
"loss": 0.4622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15325827896595,
|
||
|
|
"step": 995,
|
||
|
|
"valid_targets_mean": 10413.0,
|
||
|
|
"valid_targets_min": 2987
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.4963235294117645,
|
||
|
|
"grad_norm": 0.23551352199294445,
|
||
|
|
"learning_rate": 5.419126270005317e-06,
|
||
|
|
"loss": 0.4626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14396341145038605,
|
||
|
|
"step": 1000,
|
||
|
|
"valid_targets_mean": 9435.3,
|
||
|
|
"valid_targets_min": 2314
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.523897058823529,
|
||
|
|
"grad_norm": 0.24552735754578195,
|
||
|
|
"learning_rate": 5.23286508092051e-06,
|
||
|
|
"loss": 0.4557,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15775898098945618,
|
||
|
|
"step": 1005,
|
||
|
|
"valid_targets_mean": 9846.8,
|
||
|
|
"valid_targets_min": 2897
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.551470588235294,
|
||
|
|
"grad_norm": 0.23811014424649815,
|
||
|
|
"learning_rate": 5.049378233615652e-06,
|
||
|
|
"loss": 0.4656,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15497168898582458,
|
||
|
|
"step": 1010,
|
||
|
|
"valid_targets_mean": 9349.0,
|
||
|
|
"valid_targets_min": 4354
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.579044117647059,
|
||
|
|
"grad_norm": 0.22012023372507486,
|
||
|
|
"learning_rate": 4.868700200263521e-06,
|
||
|
|
"loss": 0.4624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1530693769454956,
|
||
|
|
"step": 1015,
|
||
|
|
"valid_targets_mean": 10066.7,
|
||
|
|
"valid_targets_min": 3738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.606617647058823,
|
||
|
|
"grad_norm": 0.2384744886144643,
|
||
|
|
"learning_rate": 4.690864925337404e-06,
|
||
|
|
"loss": 0.459,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14344365894794464,
|
||
|
|
"step": 1020,
|
||
|
|
"valid_targets_mean": 9216.1,
|
||
|
|
"valid_targets_min": 2135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.634191176470588,
|
||
|
|
"grad_norm": 0.22620664113381828,
|
||
|
|
"learning_rate": 4.515905819233828e-06,
|
||
|
|
"loss": 0.4587,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15465840697288513,
|
||
|
|
"step": 1025,
|
||
|
|
"valid_targets_mean": 9175.2,
|
||
|
|
"valid_targets_min": 2058
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.661764705882353,
|
||
|
|
"grad_norm": 0.2308131065264548,
|
||
|
|
"learning_rate": 4.343855751995645e-06,
|
||
|
|
"loss": 0.463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14798535406589508,
|
||
|
|
"step": 1030,
|
||
|
|
"valid_targets_mean": 9069.3,
|
||
|
|
"valid_targets_min": 1487
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.689338235294118,
|
||
|
|
"grad_norm": 0.20210750386819545,
|
||
|
|
"learning_rate": 4.174747047136707e-06,
|
||
|
|
"loss": 0.4629,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13314756751060486,
|
||
|
|
"step": 1035,
|
||
|
|
"valid_targets_mean": 8539.2,
|
||
|
|
"valid_targets_min": 2094
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.716911764705882,
|
||
|
|
"grad_norm": 0.2182748594468666,
|
||
|
|
"learning_rate": 4.008611475569082e-06,
|
||
|
|
"loss": 0.4623,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1582449972629547,
|
||
|
|
"step": 1040,
|
||
|
|
"valid_targets_mean": 10496.3,
|
||
|
|
"valid_targets_min": 1687
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.744485294117647,
|
||
|
|
"grad_norm": 0.22255571373558947,
|
||
|
|
"learning_rate": 3.845480249634226e-06,
|
||
|
|
"loss": 0.4664,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15279759466648102,
|
||
|
|
"step": 1045,
|
||
|
|
"valid_targets_mean": 9399.6,
|
||
|
|
"valid_targets_min": 2004
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.772058823529412,
|
||
|
|
"grad_norm": 0.23780299660682128,
|
||
|
|
"learning_rate": 3.685384017239013e-06,
|
||
|
|
"loss": 0.4563,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1619456708431244,
|
||
|
|
"step": 1050,
|
||
|
|
"valid_targets_mean": 10361.6,
|
||
|
|
"valid_targets_min": 2616
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.799632352941177,
|
||
|
|
"grad_norm": 0.2149753999385376,
|
||
|
|
"learning_rate": 3.5283528560978163e-06,
|
||
|
|
"loss": 0.4606,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15598419308662415,
|
||
|
|
"step": 1055,
|
||
|
|
"valid_targets_mean": 10178.3,
|
||
|
|
"valid_targets_min": 1567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.827205882352941,
|
||
|
|
"grad_norm": 0.23604543135606404,
|
||
|
|
"learning_rate": 3.3744162680817526e-06,
|
||
|
|
"loss": 0.463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15586526691913605,
|
||
|
|
"step": 1060,
|
||
|
|
"valid_targets_mean": 10143.1,
|
||
|
|
"valid_targets_min": 2785
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.854779411764706,
|
||
|
|
"grad_norm": 0.21782882942010112,
|
||
|
|
"learning_rate": 3.2236031736760775e-06,
|
||
|
|
"loss": 0.4628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1613750457763672,
|
||
|
|
"step": 1065,
|
||
|
|
"valid_targets_mean": 10578.9,
|
||
|
|
"valid_targets_min": 3416
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.882352941176471,
|
||
|
|
"grad_norm": 0.24360429896673724,
|
||
|
|
"learning_rate": 3.075941906546789e-06,
|
||
|
|
"loss": 0.4643,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17128227651119232,
|
||
|
|
"step": 1070,
|
||
|
|
"valid_targets_mean": 10810.2,
|
||
|
|
"valid_targets_min": 4489
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.9099264705882355,
|
||
|
|
"grad_norm": 0.21853281708343872,
|
||
|
|
"learning_rate": 2.9314602082175624e-06,
|
||
|
|
"loss": 0.4634,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1510457843542099,
|
||
|
|
"step": 1075,
|
||
|
|
"valid_targets_mean": 9562.4,
|
||
|
|
"valid_targets_min": 2854
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.9375,
|
||
|
|
"grad_norm": 0.21718471329556974,
|
||
|
|
"learning_rate": 2.790185222857804e-06,
|
||
|
|
"loss": 0.4581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14728333055973053,
|
||
|
|
"step": 1080,
|
||
|
|
"valid_targets_mean": 9295.6,
|
||
|
|
"valid_targets_min": 1896
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.9650735294117645,
|
||
|
|
"grad_norm": 0.21759757406424912,
|
||
|
|
"learning_rate": 2.6521434921830593e-06,
|
||
|
|
"loss": 0.4602,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16067388653755188,
|
||
|
|
"step": 1085,
|
||
|
|
"valid_targets_mean": 9997.4,
|
||
|
|
"valid_targets_min": 1567
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.992647058823529,
|
||
|
|
"grad_norm": 0.2372799465315037,
|
||
|
|
"learning_rate": 2.517360950468519e-06,
|
||
|
|
"loss": 0.4564,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14969328045845032,
|
||
|
|
"step": 1090,
|
||
|
|
"valid_targets_mean": 9974.2,
|
||
|
|
"valid_targets_min": 2365
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.016544117647059,
|
||
|
|
"grad_norm": 0.22070664266477452,
|
||
|
|
"learning_rate": 2.3858629196766846e-06,
|
||
|
|
"loss": 0.4622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14457306265830994,
|
||
|
|
"step": 1095,
|
||
|
|
"valid_targets_mean": 8829.4,
|
||
|
|
"valid_targets_min": 2041
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.044117647058823,
|
||
|
|
"grad_norm": 0.23770514220168587,
|
||
|
|
"learning_rate": 2.2576741047000605e-06,
|
||
|
|
"loss": 0.4634,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1596033126115799,
|
||
|
|
"step": 1100,
|
||
|
|
"valid_targets_mean": 10972.9,
|
||
|
|
"valid_targets_min": 5689
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.071691176470588,
|
||
|
|
"grad_norm": 0.2380064578876329,
|
||
|
|
"learning_rate": 2.1328185887197872e-06,
|
||
|
|
"loss": 0.4583,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16930876672267914,
|
||
|
|
"step": 1105,
|
||
|
|
"valid_targets_mean": 9747.8,
|
||
|
|
"valid_targets_min": 2320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.099264705882353,
|
||
|
|
"grad_norm": 0.22187661591130384,
|
||
|
|
"learning_rate": 2.011319828681049e-06,
|
||
|
|
"loss": 0.4545,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1560242772102356,
|
||
|
|
"step": 1110,
|
||
|
|
"valid_targets_mean": 9941.7,
|
||
|
|
"valid_targets_min": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.126838235294118,
|
||
|
|
"grad_norm": 0.22871735612785934,
|
||
|
|
"learning_rate": 1.8932006508861866e-06,
|
||
|
|
"loss": 0.4582,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13863195478916168,
|
||
|
|
"step": 1115,
|
||
|
|
"valid_targets_mean": 8947.5,
|
||
|
|
"valid_targets_min": 1633
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.154411764705882,
|
||
|
|
"grad_norm": 0.220041969177945,
|
||
|
|
"learning_rate": 1.7784832467062129e-06,
|
||
|
|
"loss": 0.4631,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16414231061935425,
|
||
|
|
"step": 1120,
|
||
|
|
"valid_targets_mean": 10703.4,
|
||
|
|
"valid_targets_min": 1974
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.181985294117647,
|
||
|
|
"grad_norm": 0.22496390430322033,
|
||
|
|
"learning_rate": 1.6671891684117048e-06,
|
||
|
|
"loss": 0.4559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16701489686965942,
|
||
|
|
"step": 1125,
|
||
|
|
"valid_targets_mean": 11036.8,
|
||
|
|
"valid_targets_min": 1223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.209558823529412,
|
||
|
|
"grad_norm": 0.23390335972144527,
|
||
|
|
"learning_rate": 1.55933932512369e-06,
|
||
|
|
"loss": 0.4602,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15574294328689575,
|
||
|
|
"step": 1130,
|
||
|
|
"valid_targets_mean": 9363.7,
|
||
|
|
"valid_targets_min": 2356
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.237132352941177,
|
||
|
|
"grad_norm": 0.21186034476089416,
|
||
|
|
"learning_rate": 1.4549539788853984e-06,
|
||
|
|
"loss": 0.4616,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.167169451713562,
|
||
|
|
"step": 1135,
|
||
|
|
"valid_targets_mean": 9769.6,
|
||
|
|
"valid_targets_min": 1886
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.264705882352941,
|
||
|
|
"grad_norm": 0.24246484888375872,
|
||
|
|
"learning_rate": 1.3540527408555915e-06,
|
||
|
|
"loss": 0.4573,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13978439569473267,
|
||
|
|
"step": 1140,
|
||
|
|
"valid_targets_mean": 9072.4,
|
||
|
|
"valid_targets_min": 1851
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.292279411764706,
|
||
|
|
"grad_norm": 0.20852258029588774,
|
||
|
|
"learning_rate": 1.2566545676241494e-06,
|
||
|
|
"loss": 0.4636,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16217908263206482,
|
||
|
|
"step": 1145,
|
||
|
|
"valid_targets_mean": 10326.9,
|
||
|
|
"valid_targets_min": 2180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.319852941176471,
|
||
|
|
"grad_norm": 0.259220830437003,
|
||
|
|
"learning_rate": 1.1627777576506306e-06,
|
||
|
|
"loss": 0.4647,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13484236598014832,
|
||
|
|
"step": 1150,
|
||
|
|
"valid_targets_mean": 8036.9,
|
||
|
|
"valid_targets_min": 2414
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.3474264705882355,
|
||
|
|
"grad_norm": 0.23165660017295853,
|
||
|
|
"learning_rate": 1.0724399478265312e-06,
|
||
|
|
"loss": 0.4619,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15904231369495392,
|
||
|
|
"step": 1155,
|
||
|
|
"valid_targets_mean": 10136.1,
|
||
|
|
"valid_targets_min": 3152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.375,
|
||
|
|
"grad_norm": 0.21620185766223732,
|
||
|
|
"learning_rate": 9.85658110161747e-07,
|
||
|
|
"loss": 0.463,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15874940156936646,
|
||
|
|
"step": 1160,
|
||
|
|
"valid_targets_mean": 10696.2,
|
||
|
|
"valid_targets_min": 1469
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.4025735294117645,
|
||
|
|
"grad_norm": 0.2266371932340415,
|
||
|
|
"learning_rate": 9.02448548596031e-07,
|
||
|
|
"loss": 0.4628,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15737676620483398,
|
||
|
|
"step": 1165,
|
||
|
|
"valid_targets_mean": 10155.7,
|
||
|
|
"valid_targets_min": 1265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.430147058823529,
|
||
|
|
"grad_norm": 0.22665087403909726,
|
||
|
|
"learning_rate": 8.228268959359086e-07,
|
||
|
|
"loss": 0.4626,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15128636360168457,
|
||
|
|
"step": 1170,
|
||
|
|
"valid_targets_mean": 9104.2,
|
||
|
|
"valid_targets_min": 1738
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.457720588235294,
|
||
|
|
"grad_norm": 0.279337484358945,
|
||
|
|
"learning_rate": 7.468081109177028e-07,
|
||
|
|
"loss": 0.4574,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.13439278304576874,
|
||
|
|
"step": 1175,
|
||
|
|
"valid_targets_mean": 8199.8,
|
||
|
|
"valid_targets_min": 1505
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.485294117647059,
|
||
|
|
"grad_norm": 0.22106817456760308,
|
||
|
|
"learning_rate": 6.744064753972068e-07,
|
||
|
|
"loss": 0.4646,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15889695286750793,
|
||
|
|
"step": 1180,
|
||
|
|
"valid_targets_mean": 9808.5,
|
||
|
|
"valid_targets_min": 2024
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.512867647058823,
|
||
|
|
"grad_norm": 0.20952794028596322,
|
||
|
|
"learning_rate": 6.056355916665024e-07,
|
||
|
|
"loss": 0.4559,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16096243262290955,
|
||
|
|
"step": 1185,
|
||
|
|
"valid_targets_mean": 10794.8,
|
||
|
|
"valid_targets_min": 3436
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.540441176470588,
|
||
|
|
"grad_norm": 0.25549266747636473,
|
||
|
|
"learning_rate": 5.405083798984567e-07,
|
||
|
|
"loss": 0.4541,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1415504813194275,
|
||
|
|
"step": 1190,
|
||
|
|
"valid_targets_mean": 8488.8,
|
||
|
|
"valid_targets_min": 1557
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.568014705882353,
|
||
|
|
"grad_norm": 0.21576881708453866,
|
||
|
|
"learning_rate": 4.790370757193907e-07,
|
||
|
|
"loss": 0.4566,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14048513770103455,
|
||
|
|
"step": 1195,
|
||
|
|
"valid_targets_mean": 8573.9,
|
||
|
|
"valid_targets_min": 1413
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.595588235294118,
|
||
|
|
"grad_norm": 0.21385907663265988,
|
||
|
|
"learning_rate": 4.212332279103204e-07,
|
||
|
|
"loss": 0.4538,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14507344365119934,
|
||
|
|
"step": 1200,
|
||
|
|
"valid_targets_mean": 8611.7,
|
||
|
|
"valid_targets_min": 1369
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.623161764705882,
|
||
|
|
"grad_norm": 0.27787320041061075,
|
||
|
|
"learning_rate": 3.671076962372655e-07,
|
||
|
|
"loss": 0.4598,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17089204490184784,
|
||
|
|
"step": 1205,
|
||
|
|
"valid_targets_mean": 10808.4,
|
||
|
|
"valid_targets_min": 2900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.650735294117647,
|
||
|
|
"grad_norm": 0.21747531396757017,
|
||
|
|
"learning_rate": 3.1667064941099724e-07,
|
||
|
|
"loss": 0.4568,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14115549623966217,
|
||
|
|
"step": 1210,
|
||
|
|
"valid_targets_mean": 8620.9,
|
||
|
|
"valid_targets_min": 2139
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.678308823529412,
|
||
|
|
"grad_norm": 0.23882168189925346,
|
||
|
|
"learning_rate": 2.699315631766064e-07,
|
||
|
|
"loss": 0.4632,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1577366292476654,
|
||
|
|
"step": 1215,
|
||
|
|
"valid_targets_mean": 10010.7,
|
||
|
|
"valid_targets_min": 2525
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.705882352941177,
|
||
|
|
"grad_norm": 0.21158133975405694,
|
||
|
|
"learning_rate": 2.26899218533283e-07,
|
||
|
|
"loss": 0.46,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15490460395812988,
|
||
|
|
"step": 1220,
|
||
|
|
"valid_targets_mean": 10206.4,
|
||
|
|
"valid_targets_min": 1948
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.733455882352941,
|
||
|
|
"grad_norm": 0.2068070148831523,
|
||
|
|
"learning_rate": 1.8758170008459142e-07,
|
||
|
|
"loss": 0.4624,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.17629992961883545,
|
||
|
|
"step": 1225,
|
||
|
|
"valid_targets_mean": 11098.0,
|
||
|
|
"valid_targets_min": 2855
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.761029411764706,
|
||
|
|
"grad_norm": 0.276570316072206,
|
||
|
|
"learning_rate": 1.5198639451960095e-07,
|
||
|
|
"loss": 0.4548,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1544700562953949,
|
||
|
|
"step": 1230,
|
||
|
|
"valid_targets_mean": 10218.4,
|
||
|
|
"valid_targets_min": 1690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.788602941176471,
|
||
|
|
"grad_norm": 0.24887539900389968,
|
||
|
|
"learning_rate": 1.201199892251337e-07,
|
||
|
|
"loss": 0.4555,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16659650206565857,
|
||
|
|
"step": 1235,
|
||
|
|
"valid_targets_mean": 11563.8,
|
||
|
|
"valid_targets_min": 1522
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8161764705882355,
|
||
|
|
"grad_norm": 0.2186914252195666,
|
||
|
|
"learning_rate": 9.198847102937614e-08,
|
||
|
|
"loss": 0.4552,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15176278352737427,
|
||
|
|
"step": 1240,
|
||
|
|
"valid_targets_mean": 9547.0,
|
||
|
|
"valid_targets_min": 2825
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.84375,
|
||
|
|
"grad_norm": 0.20820326483482113,
|
||
|
|
"learning_rate": 6.759712507711902e-08,
|
||
|
|
"loss": 0.4622,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15682631731033325,
|
||
|
|
"step": 1245,
|
||
|
|
"valid_targets_mean": 9983.1,
|
||
|
|
"valid_targets_min": 2230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.8713235294117645,
|
||
|
|
"grad_norm": 0.22180946034862845,
|
||
|
|
"learning_rate": 4.695053383683812e-08,
|
||
|
|
"loss": 0.457,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14503879845142365,
|
||
|
|
"step": 1250,
|
||
|
|
"valid_targets_mean": 8904.7,
|
||
|
|
"valid_targets_min": 1080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.898897058823529,
|
||
|
|
"grad_norm": 0.2383751709023781,
|
||
|
|
"learning_rate": 3.0052576239749666e-08,
|
||
|
|
"loss": 0.4554,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.1317831426858902,
|
||
|
|
"step": 1255,
|
||
|
|
"valid_targets_mean": 8981.2,
|
||
|
|
"valid_targets_min": 1992
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.926470588235294,
|
||
|
|
"grad_norm": 0.2107262711334655,
|
||
|
|
"learning_rate": 1.6906426951086573e-08,
|
||
|
|
"loss": 0.4581,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.14984363317489624,
|
||
|
|
"step": 1260,
|
||
|
|
"valid_targets_mean": 10053.0,
|
||
|
|
"valid_targets_min": 3060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.954044117647059,
|
||
|
|
"grad_norm": 0.2623126377291314,
|
||
|
|
"learning_rate": 7.514555773648901e-09,
|
||
|
|
"loss": 0.4614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.16520237922668457,
|
||
|
|
"step": 1265,
|
||
|
|
"valid_targets_mean": 10900.3,
|
||
|
|
"valid_targets_min": 3734
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 6.981617647058823,
|
||
|
|
"grad_norm": 0.2030352524829392,
|
||
|
|
"learning_rate": 1.8787271838083263e-09,
|
||
|
|
"loss": 0.4614,
|
||
|
|
"loss_nan_ranks": 0,
|
||
|
|
"loss_rank_avg": 0.15681001543998718,
|
||
|
|
"step": 1270,
|
||
|
|
"valid_targets_mean": 10194.2,
|
||
|
|
"valid_targets_min": 4309
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 7.0,
|
||
|
|
"step": 1274,
|
||
|
|
"total_flos": 5.319956987025293e+18,
|
||
|
|
"train_loss": 0.0,
|
||
|
|
"train_runtime": 1.2382,
|
||
|
|
"train_samples_per_second": 98264.932,
|
||
|
|
"train_steps_per_second": 1028.893
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 1274,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 300,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 5.319956987025293e+18,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|