Model: laion/openthoughts-4-code-qwen3-32b-annotated-7k_qwen3-1.7B_10k Source: Original Platform
16537 lines
462 KiB
JSON
16537 lines
462 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 8.0,
|
|
"eval_steps": 500,
|
|
"global_step": 7496,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.005336179295624333,
|
|
"grad_norm": 12.46944717625873,
|
|
"learning_rate": 1.0666666666666667e-06,
|
|
"loss": 0.8061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8061428070068359,
|
|
"step": 5,
|
|
"valid_targets_mean": 28904.8,
|
|
"valid_targets_min": 20949
|
|
},
|
|
{
|
|
"epoch": 0.010672358591248666,
|
|
"grad_norm": 10.854200136832457,
|
|
"learning_rate": 2.4000000000000003e-06,
|
|
"loss": 0.7944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7890393137931824,
|
|
"step": 10,
|
|
"valid_targets_mean": 28786.1,
|
|
"valid_targets_min": 20945
|
|
},
|
|
{
|
|
"epoch": 0.016008537886873,
|
|
"grad_norm": 6.848611631904356,
|
|
"learning_rate": 3.7333333333333337e-06,
|
|
"loss": 0.7523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7329467535018921,
|
|
"step": 15,
|
|
"valid_targets_mean": 28843.0,
|
|
"valid_targets_min": 20873
|
|
},
|
|
{
|
|
"epoch": 0.021344717182497332,
|
|
"grad_norm": 2.5190437684250737,
|
|
"learning_rate": 5.066666666666667e-06,
|
|
"loss": 0.7026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6821247935295105,
|
|
"step": 20,
|
|
"valid_targets_mean": 28758.3,
|
|
"valid_targets_min": 19258
|
|
},
|
|
{
|
|
"epoch": 0.026680896478121666,
|
|
"grad_norm": 1.447362426625492,
|
|
"learning_rate": 6.4000000000000006e-06,
|
|
"loss": 0.6692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.665269672870636,
|
|
"step": 25,
|
|
"valid_targets_mean": 28746.4,
|
|
"valid_targets_min": 20839
|
|
},
|
|
{
|
|
"epoch": 0.032017075773746,
|
|
"grad_norm": 1.3783494582439455,
|
|
"learning_rate": 7.733333333333334e-06,
|
|
"loss": 0.6579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6507786512374878,
|
|
"step": 30,
|
|
"valid_targets_mean": 28955.7,
|
|
"valid_targets_min": 22126
|
|
},
|
|
{
|
|
"epoch": 0.03735325506937033,
|
|
"grad_norm": 0.5967416140332733,
|
|
"learning_rate": 9.066666666666667e-06,
|
|
"loss": 0.6443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6362227201461792,
|
|
"step": 35,
|
|
"valid_targets_mean": 28837.7,
|
|
"valid_targets_min": 22221
|
|
},
|
|
{
|
|
"epoch": 0.042689434364994665,
|
|
"grad_norm": 0.554654624115749,
|
|
"learning_rate": 1.04e-05,
|
|
"loss": 0.6329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.631878137588501,
|
|
"step": 40,
|
|
"valid_targets_mean": 28880.9,
|
|
"valid_targets_min": 21716
|
|
},
|
|
{
|
|
"epoch": 0.048025613660619,
|
|
"grad_norm": 0.40179224460616403,
|
|
"learning_rate": 1.1733333333333333e-05,
|
|
"loss": 0.6266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6223902702331543,
|
|
"step": 45,
|
|
"valid_targets_mean": 28925.6,
|
|
"valid_targets_min": 19265
|
|
},
|
|
{
|
|
"epoch": 0.05336179295624333,
|
|
"grad_norm": 0.3137447532636136,
|
|
"learning_rate": 1.3066666666666666e-05,
|
|
"loss": 0.6208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6185448169708252,
|
|
"step": 50,
|
|
"valid_targets_mean": 28986.4,
|
|
"valid_targets_min": 21522
|
|
},
|
|
{
|
|
"epoch": 0.05869797225186766,
|
|
"grad_norm": 0.24068546725659087,
|
|
"learning_rate": 1.44e-05,
|
|
"loss": 0.6136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6145120859146118,
|
|
"step": 55,
|
|
"valid_targets_mean": 28725.1,
|
|
"valid_targets_min": 19987
|
|
},
|
|
{
|
|
"epoch": 0.064034151547492,
|
|
"grad_norm": 0.22089166210741729,
|
|
"learning_rate": 1.5733333333333334e-05,
|
|
"loss": 0.6083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6090884208679199,
|
|
"step": 60,
|
|
"valid_targets_mean": 28757.6,
|
|
"valid_targets_min": 20837
|
|
},
|
|
{
|
|
"epoch": 0.06937033084311633,
|
|
"grad_norm": 0.21982402489152278,
|
|
"learning_rate": 1.7066666666666667e-05,
|
|
"loss": 0.607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6048812866210938,
|
|
"step": 65,
|
|
"valid_targets_mean": 28755.8,
|
|
"valid_targets_min": 17393
|
|
},
|
|
{
|
|
"epoch": 0.07470651013874066,
|
|
"grad_norm": 0.2081277489820844,
|
|
"learning_rate": 1.84e-05,
|
|
"loss": 0.6034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5989037752151489,
|
|
"step": 70,
|
|
"valid_targets_mean": 28790.1,
|
|
"valid_targets_min": 19897
|
|
},
|
|
{
|
|
"epoch": 0.08004268943436499,
|
|
"grad_norm": 0.22176638943335583,
|
|
"learning_rate": 1.9733333333333333e-05,
|
|
"loss": 0.5973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5971338748931885,
|
|
"step": 75,
|
|
"valid_targets_mean": 28797.2,
|
|
"valid_targets_min": 20862
|
|
},
|
|
{
|
|
"epoch": 0.08537886872998933,
|
|
"grad_norm": 0.21267940944215732,
|
|
"learning_rate": 2.106666666666667e-05,
|
|
"loss": 0.5912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.587620735168457,
|
|
"step": 80,
|
|
"valid_targets_mean": 28717.8,
|
|
"valid_targets_min": 18978
|
|
},
|
|
{
|
|
"epoch": 0.09071504802561366,
|
|
"grad_norm": 0.2359193466407616,
|
|
"learning_rate": 2.2400000000000002e-05,
|
|
"loss": 0.5886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.583030641078949,
|
|
"step": 85,
|
|
"valid_targets_mean": 28823.8,
|
|
"valid_targets_min": 17944
|
|
},
|
|
{
|
|
"epoch": 0.096051227321238,
|
|
"grad_norm": 0.2670343955859104,
|
|
"learning_rate": 2.3733333333333335e-05,
|
|
"loss": 0.5903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5849412083625793,
|
|
"step": 90,
|
|
"valid_targets_mean": 28909.0,
|
|
"valid_targets_min": 21779
|
|
},
|
|
{
|
|
"epoch": 0.10138740661686232,
|
|
"grad_norm": 0.2670701546160268,
|
|
"learning_rate": 2.5066666666666665e-05,
|
|
"loss": 0.5842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5862775444984436,
|
|
"step": 95,
|
|
"valid_targets_mean": 28871.7,
|
|
"valid_targets_min": 22223
|
|
},
|
|
{
|
|
"epoch": 0.10672358591248667,
|
|
"grad_norm": 0.36156840702341764,
|
|
"learning_rate": 2.64e-05,
|
|
"loss": 0.5839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5871520042419434,
|
|
"step": 100,
|
|
"valid_targets_mean": 28859.4,
|
|
"valid_targets_min": 19968
|
|
},
|
|
{
|
|
"epoch": 0.11205976520811099,
|
|
"grad_norm": 0.30448811630120975,
|
|
"learning_rate": 2.7733333333333334e-05,
|
|
"loss": 0.5835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5811035633087158,
|
|
"step": 105,
|
|
"valid_targets_mean": 28989.8,
|
|
"valid_targets_min": 18150
|
|
},
|
|
{
|
|
"epoch": 0.11739594450373532,
|
|
"grad_norm": 0.37432798390739574,
|
|
"learning_rate": 2.906666666666667e-05,
|
|
"loss": 0.5798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5788925886154175,
|
|
"step": 110,
|
|
"valid_targets_mean": 28699.6,
|
|
"valid_targets_min": 21089
|
|
},
|
|
{
|
|
"epoch": 0.12273212379935966,
|
|
"grad_norm": 0.32787186906252047,
|
|
"learning_rate": 3.04e-05,
|
|
"loss": 0.5784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5760443210601807,
|
|
"step": 115,
|
|
"valid_targets_mean": 28889.4,
|
|
"valid_targets_min": 21459
|
|
},
|
|
{
|
|
"epoch": 0.128068303094984,
|
|
"grad_norm": 0.270138890246249,
|
|
"learning_rate": 3.173333333333334e-05,
|
|
"loss": 0.5753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5727493166923523,
|
|
"step": 120,
|
|
"valid_targets_mean": 28960.6,
|
|
"valid_targets_min": 22036
|
|
},
|
|
{
|
|
"epoch": 0.13340448239060831,
|
|
"grad_norm": 0.5654127135981639,
|
|
"learning_rate": 3.3066666666666666e-05,
|
|
"loss": 0.5753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5761654376983643,
|
|
"step": 125,
|
|
"valid_targets_mean": 28920.7,
|
|
"valid_targets_min": 22329
|
|
},
|
|
{
|
|
"epoch": 0.13874066168623267,
|
|
"grad_norm": 0.5017381861949305,
|
|
"learning_rate": 3.4399999999999996e-05,
|
|
"loss": 0.5761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5769978761672974,
|
|
"step": 130,
|
|
"valid_targets_mean": 28861.9,
|
|
"valid_targets_min": 16348
|
|
},
|
|
{
|
|
"epoch": 0.144076840981857,
|
|
"grad_norm": 0.6326271596195411,
|
|
"learning_rate": 3.573333333333333e-05,
|
|
"loss": 0.576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5722428560256958,
|
|
"step": 135,
|
|
"valid_targets_mean": 28751.1,
|
|
"valid_targets_min": 21044
|
|
},
|
|
{
|
|
"epoch": 0.14941302027748132,
|
|
"grad_norm": 0.4278862899999183,
|
|
"learning_rate": 3.706666666666667e-05,
|
|
"loss": 0.5715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5662323236465454,
|
|
"step": 140,
|
|
"valid_targets_mean": 28761.6,
|
|
"valid_targets_min": 21030
|
|
},
|
|
{
|
|
"epoch": 0.15474919957310565,
|
|
"grad_norm": 0.6394771886602794,
|
|
"learning_rate": 3.8400000000000005e-05,
|
|
"loss": 0.5701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5686701536178589,
|
|
"step": 145,
|
|
"valid_targets_mean": 28730.7,
|
|
"valid_targets_min": 20582
|
|
},
|
|
{
|
|
"epoch": 0.16008537886872998,
|
|
"grad_norm": 0.48399755814562095,
|
|
"learning_rate": 3.9733333333333335e-05,
|
|
"loss": 0.5685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5691660642623901,
|
|
"step": 150,
|
|
"valid_targets_mean": 28573.6,
|
|
"valid_targets_min": 19330
|
|
},
|
|
{
|
|
"epoch": 0.16542155816435433,
|
|
"grad_norm": 0.661411567386734,
|
|
"learning_rate": 4.106666666666667e-05,
|
|
"loss": 0.5692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5699338912963867,
|
|
"step": 155,
|
|
"valid_targets_mean": 28886.6,
|
|
"valid_targets_min": 21972
|
|
},
|
|
{
|
|
"epoch": 0.17075773745997866,
|
|
"grad_norm": 0.5829717337815095,
|
|
"learning_rate": 4.24e-05,
|
|
"loss": 0.5663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5658145546913147,
|
|
"step": 160,
|
|
"valid_targets_mean": 28911.6,
|
|
"valid_targets_min": 22554
|
|
},
|
|
{
|
|
"epoch": 0.17609391675560299,
|
|
"grad_norm": 0.6636178616387889,
|
|
"learning_rate": 4.373333333333334e-05,
|
|
"loss": 0.5681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5657886862754822,
|
|
"step": 165,
|
|
"valid_targets_mean": 28936.7,
|
|
"valid_targets_min": 22572
|
|
},
|
|
{
|
|
"epoch": 0.1814300960512273,
|
|
"grad_norm": 0.613994440314584,
|
|
"learning_rate": 4.5066666666666667e-05,
|
|
"loss": 0.5683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.56809401512146,
|
|
"step": 170,
|
|
"valid_targets_mean": 28852.1,
|
|
"valid_targets_min": 20877
|
|
},
|
|
{
|
|
"epoch": 0.18676627534685167,
|
|
"grad_norm": 0.6899646348527206,
|
|
"learning_rate": 4.64e-05,
|
|
"loss": 0.5667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5632824897766113,
|
|
"step": 175,
|
|
"valid_targets_mean": 28809.7,
|
|
"valid_targets_min": 21590
|
|
},
|
|
{
|
|
"epoch": 0.192102454642476,
|
|
"grad_norm": 0.4873629937543719,
|
|
"learning_rate": 4.773333333333333e-05,
|
|
"loss": 0.5643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.565109133720398,
|
|
"step": 180,
|
|
"valid_targets_mean": 28640.0,
|
|
"valid_targets_min": 17506
|
|
},
|
|
{
|
|
"epoch": 0.19743863393810032,
|
|
"grad_norm": 0.5777759535879822,
|
|
"learning_rate": 4.906666666666667e-05,
|
|
"loss": 0.5669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5687307119369507,
|
|
"step": 185,
|
|
"valid_targets_mean": 29064.5,
|
|
"valid_targets_min": 24174
|
|
},
|
|
{
|
|
"epoch": 0.20277481323372465,
|
|
"grad_norm": 0.6884108094887561,
|
|
"learning_rate": 5.0400000000000005e-05,
|
|
"loss": 0.5667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5649628043174744,
|
|
"step": 190,
|
|
"valid_targets_mean": 28915.0,
|
|
"valid_targets_min": 22022
|
|
},
|
|
{
|
|
"epoch": 0.20811099252934898,
|
|
"grad_norm": 0.6017305517883439,
|
|
"learning_rate": 5.1733333333333335e-05,
|
|
"loss": 0.5628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5632403492927551,
|
|
"step": 195,
|
|
"valid_targets_mean": 28751.2,
|
|
"valid_targets_min": 19909
|
|
},
|
|
{
|
|
"epoch": 0.21344717182497333,
|
|
"grad_norm": 0.6802658164600427,
|
|
"learning_rate": 5.3066666666666665e-05,
|
|
"loss": 0.5646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5695996284484863,
|
|
"step": 200,
|
|
"valid_targets_mean": 28698.4,
|
|
"valid_targets_min": 21957
|
|
},
|
|
{
|
|
"epoch": 0.21878335112059766,
|
|
"grad_norm": 0.4538292008860434,
|
|
"learning_rate": 5.440000000000001e-05,
|
|
"loss": 0.5618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5593487024307251,
|
|
"step": 205,
|
|
"valid_targets_mean": 28750.0,
|
|
"valid_targets_min": 17693
|
|
},
|
|
{
|
|
"epoch": 0.22411953041622198,
|
|
"grad_norm": 0.641577142782705,
|
|
"learning_rate": 5.573333333333334e-05,
|
|
"loss": 0.5645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5658643245697021,
|
|
"step": 210,
|
|
"valid_targets_mean": 28656.9,
|
|
"valid_targets_min": 17376
|
|
},
|
|
{
|
|
"epoch": 0.2294557097118463,
|
|
"grad_norm": 0.5170054885262663,
|
|
"learning_rate": 5.706666666666667e-05,
|
|
"loss": 0.5614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5651887655258179,
|
|
"step": 215,
|
|
"valid_targets_mean": 28857.5,
|
|
"valid_targets_min": 23359
|
|
},
|
|
{
|
|
"epoch": 0.23479188900747064,
|
|
"grad_norm": 0.6177322178483213,
|
|
"learning_rate": 5.8399999999999997e-05,
|
|
"loss": 0.5601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5584475994110107,
|
|
"step": 220,
|
|
"valid_targets_mean": 28901.5,
|
|
"valid_targets_min": 22575
|
|
},
|
|
{
|
|
"epoch": 0.240128068303095,
|
|
"grad_norm": 0.5026653012436633,
|
|
"learning_rate": 5.973333333333334e-05,
|
|
"loss": 0.5599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5588065385818481,
|
|
"step": 225,
|
|
"valid_targets_mean": 28699.3,
|
|
"valid_targets_min": 21859
|
|
},
|
|
{
|
|
"epoch": 0.24546424759871932,
|
|
"grad_norm": 0.4995892379669603,
|
|
"learning_rate": 6.106666666666667e-05,
|
|
"loss": 0.5607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5605010986328125,
|
|
"step": 230,
|
|
"valid_targets_mean": 28722.3,
|
|
"valid_targets_min": 22420
|
|
},
|
|
{
|
|
"epoch": 0.2508004268943437,
|
|
"grad_norm": 0.650437313972496,
|
|
"learning_rate": 6.24e-05,
|
|
"loss": 0.5587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5637643933296204,
|
|
"step": 235,
|
|
"valid_targets_mean": 28703.3,
|
|
"valid_targets_min": 21783
|
|
},
|
|
{
|
|
"epoch": 0.256136606189968,
|
|
"grad_norm": 0.4374501963289653,
|
|
"learning_rate": 6.373333333333333e-05,
|
|
"loss": 0.5571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.558539092540741,
|
|
"step": 240,
|
|
"valid_targets_mean": 28831.0,
|
|
"valid_targets_min": 19105
|
|
},
|
|
{
|
|
"epoch": 0.26147278548559233,
|
|
"grad_norm": 0.5120460721645188,
|
|
"learning_rate": 6.506666666666666e-05,
|
|
"loss": 0.5562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5536611080169678,
|
|
"step": 245,
|
|
"valid_targets_mean": 28847.3,
|
|
"valid_targets_min": 20476
|
|
},
|
|
{
|
|
"epoch": 0.26680896478121663,
|
|
"grad_norm": 0.6197628197231306,
|
|
"learning_rate": 6.64e-05,
|
|
"loss": 0.5557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5548427104949951,
|
|
"step": 250,
|
|
"valid_targets_mean": 28817.5,
|
|
"valid_targets_min": 19630
|
|
},
|
|
{
|
|
"epoch": 0.272145144076841,
|
|
"grad_norm": 0.71234145392978,
|
|
"learning_rate": 6.773333333333333e-05,
|
|
"loss": 0.5573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5544036626815796,
|
|
"step": 255,
|
|
"valid_targets_mean": 28778.9,
|
|
"valid_targets_min": 20199
|
|
},
|
|
{
|
|
"epoch": 0.27748132337246534,
|
|
"grad_norm": 0.6513418768796597,
|
|
"learning_rate": 6.906666666666667e-05,
|
|
"loss": 0.5536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5518766045570374,
|
|
"step": 260,
|
|
"valid_targets_mean": 28928.9,
|
|
"valid_targets_min": 20515
|
|
},
|
|
{
|
|
"epoch": 0.28281750266808964,
|
|
"grad_norm": 0.6264548727276208,
|
|
"learning_rate": 7.04e-05,
|
|
"loss": 0.5542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5519798994064331,
|
|
"step": 265,
|
|
"valid_targets_mean": 28822.4,
|
|
"valid_targets_min": 18953
|
|
},
|
|
{
|
|
"epoch": 0.288153681963714,
|
|
"grad_norm": 0.9786656061806465,
|
|
"learning_rate": 7.173333333333335e-05,
|
|
"loss": 0.5552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5583288669586182,
|
|
"step": 270,
|
|
"valid_targets_mean": 28753.7,
|
|
"valid_targets_min": 20559
|
|
},
|
|
{
|
|
"epoch": 0.2934898612593383,
|
|
"grad_norm": 0.49572263802680083,
|
|
"learning_rate": 7.306666666666668e-05,
|
|
"loss": 0.5525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5550556182861328,
|
|
"step": 275,
|
|
"valid_targets_mean": 28987.9,
|
|
"valid_targets_min": 19281
|
|
},
|
|
{
|
|
"epoch": 0.29882604055496265,
|
|
"grad_norm": 0.6362766358978091,
|
|
"learning_rate": 7.44e-05,
|
|
"loss": 0.5558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.549485445022583,
|
|
"step": 280,
|
|
"valid_targets_mean": 28889.4,
|
|
"valid_targets_min": 19873
|
|
},
|
|
{
|
|
"epoch": 0.304162219850587,
|
|
"grad_norm": 0.946560173004172,
|
|
"learning_rate": 7.573333333333334e-05,
|
|
"loss": 0.5542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5535620450973511,
|
|
"step": 285,
|
|
"valid_targets_mean": 29046.5,
|
|
"valid_targets_min": 23402
|
|
},
|
|
{
|
|
"epoch": 0.3094983991462113,
|
|
"grad_norm": 0.6213054757676977,
|
|
"learning_rate": 7.706666666666668e-05,
|
|
"loss": 0.5512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5472004413604736,
|
|
"step": 290,
|
|
"valid_targets_mean": 28608.9,
|
|
"valid_targets_min": 17886
|
|
},
|
|
{
|
|
"epoch": 0.31483457844183566,
|
|
"grad_norm": 0.8231565352486132,
|
|
"learning_rate": 7.840000000000001e-05,
|
|
"loss": 0.5513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5477820038795471,
|
|
"step": 295,
|
|
"valid_targets_mean": 28731.2,
|
|
"valid_targets_min": 21340
|
|
},
|
|
{
|
|
"epoch": 0.32017075773745995,
|
|
"grad_norm": 0.7927421222958255,
|
|
"learning_rate": 7.973333333333334e-05,
|
|
"loss": 0.5508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5506869554519653,
|
|
"step": 300,
|
|
"valid_targets_mean": 28976.3,
|
|
"valid_targets_min": 22880
|
|
},
|
|
{
|
|
"epoch": 0.3255069370330843,
|
|
"grad_norm": 0.7032608359190291,
|
|
"learning_rate": 8.106666666666667e-05,
|
|
"loss": 0.5524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5539842844009399,
|
|
"step": 305,
|
|
"valid_targets_mean": 28901.7,
|
|
"valid_targets_min": 21855
|
|
},
|
|
{
|
|
"epoch": 0.33084311632870866,
|
|
"grad_norm": 0.6731165496458789,
|
|
"learning_rate": 8.24e-05,
|
|
"loss": 0.5456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.538582444190979,
|
|
"step": 310,
|
|
"valid_targets_mean": 28647.1,
|
|
"valid_targets_min": 17737
|
|
},
|
|
{
|
|
"epoch": 0.33617929562433296,
|
|
"grad_norm": 0.7605105711198032,
|
|
"learning_rate": 8.373333333333334e-05,
|
|
"loss": 0.5485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5467817783355713,
|
|
"step": 315,
|
|
"valid_targets_mean": 28880.8,
|
|
"valid_targets_min": 19209
|
|
},
|
|
{
|
|
"epoch": 0.3415154749199573,
|
|
"grad_norm": 0.7701934663081149,
|
|
"learning_rate": 8.506666666666667e-05,
|
|
"loss": 0.5457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5447990894317627,
|
|
"step": 320,
|
|
"valid_targets_mean": 28782.5,
|
|
"valid_targets_min": 21917
|
|
},
|
|
{
|
|
"epoch": 0.3468516542155816,
|
|
"grad_norm": 0.5887577074410728,
|
|
"learning_rate": 8.64e-05,
|
|
"loss": 0.55,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5493910312652588,
|
|
"step": 325,
|
|
"valid_targets_mean": 28818.9,
|
|
"valid_targets_min": 20918
|
|
},
|
|
{
|
|
"epoch": 0.35218783351120597,
|
|
"grad_norm": 0.7582401178842324,
|
|
"learning_rate": 8.773333333333333e-05,
|
|
"loss": 0.546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5477738380432129,
|
|
"step": 330,
|
|
"valid_targets_mean": 28822.9,
|
|
"valid_targets_min": 21199
|
|
},
|
|
{
|
|
"epoch": 0.3575240128068303,
|
|
"grad_norm": 0.637643072735598,
|
|
"learning_rate": 8.906666666666667e-05,
|
|
"loss": 0.5464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.54499351978302,
|
|
"step": 335,
|
|
"valid_targets_mean": 28851.6,
|
|
"valid_targets_min": 20387
|
|
},
|
|
{
|
|
"epoch": 0.3628601921024546,
|
|
"grad_norm": 0.6724352862905262,
|
|
"learning_rate": 9.04e-05,
|
|
"loss": 0.5445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5406051874160767,
|
|
"step": 340,
|
|
"valid_targets_mean": 28807.4,
|
|
"valid_targets_min": 20681
|
|
},
|
|
{
|
|
"epoch": 0.368196371398079,
|
|
"grad_norm": 0.679305938726839,
|
|
"learning_rate": 9.173333333333333e-05,
|
|
"loss": 0.5452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5453157424926758,
|
|
"step": 345,
|
|
"valid_targets_mean": 28767.2,
|
|
"valid_targets_min": 17033
|
|
},
|
|
{
|
|
"epoch": 0.37353255069370334,
|
|
"grad_norm": 0.7960103561430129,
|
|
"learning_rate": 9.306666666666667e-05,
|
|
"loss": 0.5437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5461529493331909,
|
|
"step": 350,
|
|
"valid_targets_mean": 28861.1,
|
|
"valid_targets_min": 20183
|
|
},
|
|
{
|
|
"epoch": 0.37886872998932764,
|
|
"grad_norm": 0.7876674594635931,
|
|
"learning_rate": 9.44e-05,
|
|
"loss": 0.5431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5420316457748413,
|
|
"step": 355,
|
|
"valid_targets_mean": 28843.2,
|
|
"valid_targets_min": 18915
|
|
},
|
|
{
|
|
"epoch": 0.384204909284952,
|
|
"grad_norm": 0.5866144039786133,
|
|
"learning_rate": 9.573333333333335e-05,
|
|
"loss": 0.5407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5363604426383972,
|
|
"step": 360,
|
|
"valid_targets_mean": 28832.0,
|
|
"valid_targets_min": 22102
|
|
},
|
|
{
|
|
"epoch": 0.3895410885805763,
|
|
"grad_norm": 0.8109992871458446,
|
|
"learning_rate": 9.706666666666668e-05,
|
|
"loss": 0.5418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5428715944290161,
|
|
"step": 365,
|
|
"valid_targets_mean": 28704.1,
|
|
"valid_targets_min": 19957
|
|
},
|
|
{
|
|
"epoch": 0.39487726787620064,
|
|
"grad_norm": 0.8409633161647663,
|
|
"learning_rate": 9.84e-05,
|
|
"loss": 0.5414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5436188578605652,
|
|
"step": 370,
|
|
"valid_targets_mean": 28830.3,
|
|
"valid_targets_min": 21361
|
|
},
|
|
{
|
|
"epoch": 0.400213447171825,
|
|
"grad_norm": 0.6132368932425373,
|
|
"learning_rate": 9.973333333333334e-05,
|
|
"loss": 0.5422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5386438369750977,
|
|
"step": 375,
|
|
"valid_targets_mean": 28820.3,
|
|
"valid_targets_min": 22322
|
|
},
|
|
{
|
|
"epoch": 0.4055496264674493,
|
|
"grad_norm": 0.8058492330870486,
|
|
"learning_rate": 0.00010106666666666667,
|
|
"loss": 0.5416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.53936368227005,
|
|
"step": 380,
|
|
"valid_targets_mean": 28791.6,
|
|
"valid_targets_min": 21860
|
|
},
|
|
{
|
|
"epoch": 0.41088580576307365,
|
|
"grad_norm": 0.7700529206677325,
|
|
"learning_rate": 0.00010240000000000001,
|
|
"loss": 0.5404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5447088479995728,
|
|
"step": 385,
|
|
"valid_targets_mean": 28704.1,
|
|
"valid_targets_min": 20010
|
|
},
|
|
{
|
|
"epoch": 0.41622198505869795,
|
|
"grad_norm": 0.8893683706573925,
|
|
"learning_rate": 0.00010373333333333335,
|
|
"loss": 0.5397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5416263341903687,
|
|
"step": 390,
|
|
"valid_targets_mean": 28805.6,
|
|
"valid_targets_min": 20428
|
|
},
|
|
{
|
|
"epoch": 0.4215581643543223,
|
|
"grad_norm": 1.0117185257792445,
|
|
"learning_rate": 0.00010506666666666667,
|
|
"loss": 0.5425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5466989874839783,
|
|
"step": 395,
|
|
"valid_targets_mean": 28593.4,
|
|
"valid_targets_min": 20891
|
|
},
|
|
{
|
|
"epoch": 0.42689434364994666,
|
|
"grad_norm": 0.8925858685066481,
|
|
"learning_rate": 0.00010640000000000001,
|
|
"loss": 0.5411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5401574969291687,
|
|
"step": 400,
|
|
"valid_targets_mean": 29122.6,
|
|
"valid_targets_min": 19030
|
|
},
|
|
{
|
|
"epoch": 0.43223052294557096,
|
|
"grad_norm": 0.7349507561363565,
|
|
"learning_rate": 0.00010773333333333333,
|
|
"loss": 0.5399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5398991107940674,
|
|
"step": 405,
|
|
"valid_targets_mean": 28741.1,
|
|
"valid_targets_min": 21502
|
|
},
|
|
{
|
|
"epoch": 0.4375667022411953,
|
|
"grad_norm": 1.198184363542487,
|
|
"learning_rate": 0.00010906666666666667,
|
|
"loss": 0.5389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5372830033302307,
|
|
"step": 410,
|
|
"valid_targets_mean": 28847.6,
|
|
"valid_targets_min": 22709
|
|
},
|
|
{
|
|
"epoch": 0.4429028815368196,
|
|
"grad_norm": 0.9502266248869049,
|
|
"learning_rate": 0.00011040000000000001,
|
|
"loss": 0.5394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5405418276786804,
|
|
"step": 415,
|
|
"valid_targets_mean": 28784.6,
|
|
"valid_targets_min": 19776
|
|
},
|
|
{
|
|
"epoch": 0.44823906083244397,
|
|
"grad_norm": 0.7936068497230154,
|
|
"learning_rate": 0.00011173333333333333,
|
|
"loss": 0.539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5384629964828491,
|
|
"step": 420,
|
|
"valid_targets_mean": 28831.8,
|
|
"valid_targets_min": 23168
|
|
},
|
|
{
|
|
"epoch": 0.4535752401280683,
|
|
"grad_norm": 0.7204539211375129,
|
|
"learning_rate": 0.00011306666666666667,
|
|
"loss": 0.5382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5382857322692871,
|
|
"step": 425,
|
|
"valid_targets_mean": 28729.3,
|
|
"valid_targets_min": 18829
|
|
},
|
|
{
|
|
"epoch": 0.4589114194236926,
|
|
"grad_norm": 0.9715280954871497,
|
|
"learning_rate": 0.0001144,
|
|
"loss": 0.5345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5296086072921753,
|
|
"step": 430,
|
|
"valid_targets_mean": 28995.2,
|
|
"valid_targets_min": 21516
|
|
},
|
|
{
|
|
"epoch": 0.464247598719317,
|
|
"grad_norm": 1.1643790183048932,
|
|
"learning_rate": 0.00011573333333333333,
|
|
"loss": 0.5367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.537731409072876,
|
|
"step": 435,
|
|
"valid_targets_mean": 28795.0,
|
|
"valid_targets_min": 19489
|
|
},
|
|
{
|
|
"epoch": 0.4695837780149413,
|
|
"grad_norm": 0.8770740878776596,
|
|
"learning_rate": 0.00011706666666666668,
|
|
"loss": 0.5384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5397451519966125,
|
|
"step": 440,
|
|
"valid_targets_mean": 28636.4,
|
|
"valid_targets_min": 16244
|
|
},
|
|
{
|
|
"epoch": 0.47491995731056563,
|
|
"grad_norm": 0.9344853601422126,
|
|
"learning_rate": 0.0001184,
|
|
"loss": 0.5379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5392533540725708,
|
|
"step": 445,
|
|
"valid_targets_mean": 28694.7,
|
|
"valid_targets_min": 22274
|
|
},
|
|
{
|
|
"epoch": 0.48025613660619,
|
|
"grad_norm": 0.7668304201033254,
|
|
"learning_rate": 0.00011973333333333335,
|
|
"loss": 0.5365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5335771441459656,
|
|
"step": 450,
|
|
"valid_targets_mean": 28841.2,
|
|
"valid_targets_min": 21563
|
|
},
|
|
{
|
|
"epoch": 0.4855923159018143,
|
|
"grad_norm": 0.6698987095508048,
|
|
"learning_rate": 0.00012106666666666666,
|
|
"loss": 0.5357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.534684956073761,
|
|
"step": 455,
|
|
"valid_targets_mean": 28906.0,
|
|
"valid_targets_min": 21832
|
|
},
|
|
{
|
|
"epoch": 0.49092849519743864,
|
|
"grad_norm": 0.9076765335380783,
|
|
"learning_rate": 0.0001224,
|
|
"loss": 0.5338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5326440334320068,
|
|
"step": 460,
|
|
"valid_targets_mean": 28854.4,
|
|
"valid_targets_min": 22257
|
|
},
|
|
{
|
|
"epoch": 0.49626467449306294,
|
|
"grad_norm": 0.8366795432780757,
|
|
"learning_rate": 0.00012373333333333335,
|
|
"loss": 0.5304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5290672183036804,
|
|
"step": 465,
|
|
"valid_targets_mean": 28736.9,
|
|
"valid_targets_min": 18357
|
|
},
|
|
{
|
|
"epoch": 0.5016008537886874,
|
|
"grad_norm": 1.0802707635836115,
|
|
"learning_rate": 0.00012506666666666665,
|
|
"loss": 0.5353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5346341133117676,
|
|
"step": 470,
|
|
"valid_targets_mean": 28902.0,
|
|
"valid_targets_min": 22967
|
|
},
|
|
{
|
|
"epoch": 0.5069370330843116,
|
|
"grad_norm": 0.9609279719889722,
|
|
"learning_rate": 0.0001264,
|
|
"loss": 0.5326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5342420935630798,
|
|
"step": 475,
|
|
"valid_targets_mean": 28665.1,
|
|
"valid_targets_min": 21412
|
|
},
|
|
{
|
|
"epoch": 0.512273212379936,
|
|
"grad_norm": 1.3158372695521554,
|
|
"learning_rate": 0.00012773333333333334,
|
|
"loss": 0.5344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5368781089782715,
|
|
"step": 480,
|
|
"valid_targets_mean": 28766.8,
|
|
"valid_targets_min": 19331
|
|
},
|
|
{
|
|
"epoch": 0.5176093916755603,
|
|
"grad_norm": 0.9795998874478892,
|
|
"learning_rate": 0.00012906666666666667,
|
|
"loss": 0.5342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5322054624557495,
|
|
"step": 485,
|
|
"valid_targets_mean": 28829.6,
|
|
"valid_targets_min": 20404
|
|
},
|
|
{
|
|
"epoch": 0.5229455709711847,
|
|
"grad_norm": 0.6778702038700283,
|
|
"learning_rate": 0.0001304,
|
|
"loss": 0.5334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5259044170379639,
|
|
"step": 490,
|
|
"valid_targets_mean": 28834.2,
|
|
"valid_targets_min": 22315
|
|
},
|
|
{
|
|
"epoch": 0.528281750266809,
|
|
"grad_norm": 1.317650223882812,
|
|
"learning_rate": 0.00013173333333333333,
|
|
"loss": 0.5318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5341742038726807,
|
|
"step": 495,
|
|
"valid_targets_mean": 28683.8,
|
|
"valid_targets_min": 19502
|
|
},
|
|
{
|
|
"epoch": 0.5336179295624333,
|
|
"grad_norm": 1.0156689995894708,
|
|
"learning_rate": 0.00013306666666666668,
|
|
"loss": 0.535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.535778284072876,
|
|
"step": 500,
|
|
"valid_targets_mean": 28661.4,
|
|
"valid_targets_min": 20971
|
|
},
|
|
{
|
|
"epoch": 0.5389541088580576,
|
|
"grad_norm": 1.0878575161019286,
|
|
"learning_rate": 0.00013440000000000001,
|
|
"loss": 0.5318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5342371463775635,
|
|
"step": 505,
|
|
"valid_targets_mean": 28679.2,
|
|
"valid_targets_min": 18819
|
|
},
|
|
{
|
|
"epoch": 0.544290288153682,
|
|
"grad_norm": 1.0272627391559297,
|
|
"learning_rate": 0.00013573333333333334,
|
|
"loss": 0.5293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5333046913146973,
|
|
"step": 510,
|
|
"valid_targets_mean": 28750.6,
|
|
"valid_targets_min": 21745
|
|
},
|
|
{
|
|
"epoch": 0.5496264674493063,
|
|
"grad_norm": 0.8126962382300175,
|
|
"learning_rate": 0.00013706666666666667,
|
|
"loss": 0.5288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5308830738067627,
|
|
"step": 515,
|
|
"valid_targets_mean": 28827.4,
|
|
"valid_targets_min": 19230
|
|
},
|
|
{
|
|
"epoch": 0.5549626467449307,
|
|
"grad_norm": 0.7393375684190655,
|
|
"learning_rate": 0.0001384,
|
|
"loss": 0.5318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5321455597877502,
|
|
"step": 520,
|
|
"valid_targets_mean": 28860.7,
|
|
"valid_targets_min": 20761
|
|
},
|
|
{
|
|
"epoch": 0.5602988260405549,
|
|
"grad_norm": 0.791448603346915,
|
|
"learning_rate": 0.00013973333333333333,
|
|
"loss": 0.5311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5327831506729126,
|
|
"step": 525,
|
|
"valid_targets_mean": 28866.7,
|
|
"valid_targets_min": 21514
|
|
},
|
|
{
|
|
"epoch": 0.5656350053361793,
|
|
"grad_norm": 0.7635180566237668,
|
|
"learning_rate": 0.0001410666666666667,
|
|
"loss": 0.5292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5294798016548157,
|
|
"step": 530,
|
|
"valid_targets_mean": 28815.9,
|
|
"valid_targets_min": 22727
|
|
},
|
|
{
|
|
"epoch": 0.5709711846318036,
|
|
"grad_norm": 0.7299144969095928,
|
|
"learning_rate": 0.0001424,
|
|
"loss": 0.5276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5238897800445557,
|
|
"step": 535,
|
|
"valid_targets_mean": 28845.8,
|
|
"valid_targets_min": 22600
|
|
},
|
|
{
|
|
"epoch": 0.576307363927428,
|
|
"grad_norm": 0.9463246724807779,
|
|
"learning_rate": 0.00014373333333333335,
|
|
"loss": 0.5289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5301527976989746,
|
|
"step": 540,
|
|
"valid_targets_mean": 28835.9,
|
|
"valid_targets_min": 22871
|
|
},
|
|
{
|
|
"epoch": 0.5816435432230523,
|
|
"grad_norm": 0.7734584962993285,
|
|
"learning_rate": 0.00014506666666666668,
|
|
"loss": 0.5251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5256263017654419,
|
|
"step": 545,
|
|
"valid_targets_mean": 28784.2,
|
|
"valid_targets_min": 22150
|
|
},
|
|
{
|
|
"epoch": 0.5869797225186766,
|
|
"grad_norm": 1.2225580809817047,
|
|
"learning_rate": 0.0001464,
|
|
"loss": 0.5298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5331275463104248,
|
|
"step": 550,
|
|
"valid_targets_mean": 28789.1,
|
|
"valid_targets_min": 22110
|
|
},
|
|
{
|
|
"epoch": 0.5923159018143009,
|
|
"grad_norm": 0.7567944602442186,
|
|
"learning_rate": 0.00014773333333333334,
|
|
"loss": 0.5287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5319618582725525,
|
|
"step": 555,
|
|
"valid_targets_mean": 28775.0,
|
|
"valid_targets_min": 21938
|
|
},
|
|
{
|
|
"epoch": 0.5976520811099253,
|
|
"grad_norm": 0.9601392530255699,
|
|
"learning_rate": 0.00014906666666666667,
|
|
"loss": 0.5278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5328255295753479,
|
|
"step": 560,
|
|
"valid_targets_mean": 28883.7,
|
|
"valid_targets_min": 16759
|
|
},
|
|
{
|
|
"epoch": 0.6029882604055496,
|
|
"grad_norm": 1.1840261155964624,
|
|
"learning_rate": 0.0001504,
|
|
"loss": 0.5264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5287203788757324,
|
|
"step": 565,
|
|
"valid_targets_mean": 28914.4,
|
|
"valid_targets_min": 23391
|
|
},
|
|
{
|
|
"epoch": 0.608324439701174,
|
|
"grad_norm": 0.7959310356438406,
|
|
"learning_rate": 0.00015173333333333335,
|
|
"loss": 0.5298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5267726182937622,
|
|
"step": 570,
|
|
"valid_targets_mean": 28953.4,
|
|
"valid_targets_min": 22290
|
|
},
|
|
{
|
|
"epoch": 0.6136606189967982,
|
|
"grad_norm": 0.8250892679360178,
|
|
"learning_rate": 0.00015306666666666666,
|
|
"loss": 0.5272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5269379019737244,
|
|
"step": 575,
|
|
"valid_targets_mean": 28862.8,
|
|
"valid_targets_min": 20440
|
|
},
|
|
{
|
|
"epoch": 0.6189967982924226,
|
|
"grad_norm": 1.1422164796143262,
|
|
"learning_rate": 0.0001544,
|
|
"loss": 0.5277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5261090993881226,
|
|
"step": 580,
|
|
"valid_targets_mean": 28641.9,
|
|
"valid_targets_min": 13903
|
|
},
|
|
{
|
|
"epoch": 0.624332977588047,
|
|
"grad_norm": 0.6963629627094391,
|
|
"learning_rate": 0.00015573333333333334,
|
|
"loss": 0.5259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5219688415527344,
|
|
"step": 585,
|
|
"valid_targets_mean": 29003.0,
|
|
"valid_targets_min": 20872
|
|
},
|
|
{
|
|
"epoch": 0.6296691568836713,
|
|
"grad_norm": 0.9603755444061319,
|
|
"learning_rate": 0.00015706666666666667,
|
|
"loss": 0.5245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5274404287338257,
|
|
"step": 590,
|
|
"valid_targets_mean": 29018.0,
|
|
"valid_targets_min": 18214
|
|
},
|
|
{
|
|
"epoch": 0.6350053361792957,
|
|
"grad_norm": 0.9742014368447384,
|
|
"learning_rate": 0.00015840000000000003,
|
|
"loss": 0.5261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5234408378601074,
|
|
"step": 595,
|
|
"valid_targets_mean": 28871.5,
|
|
"valid_targets_min": 21415
|
|
},
|
|
{
|
|
"epoch": 0.6403415154749199,
|
|
"grad_norm": 1.1012816833594785,
|
|
"learning_rate": 0.00015973333333333333,
|
|
"loss": 0.5242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5250797271728516,
|
|
"step": 600,
|
|
"valid_targets_mean": 28953.2,
|
|
"valid_targets_min": 23044
|
|
},
|
|
{
|
|
"epoch": 0.6456776947705443,
|
|
"grad_norm": 1.17331547694115,
|
|
"learning_rate": 0.0001610666666666667,
|
|
"loss": 0.5263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5289738774299622,
|
|
"step": 605,
|
|
"valid_targets_mean": 29002.3,
|
|
"valid_targets_min": 23780
|
|
},
|
|
{
|
|
"epoch": 0.6510138740661686,
|
|
"grad_norm": 0.7971149521840055,
|
|
"learning_rate": 0.00016240000000000002,
|
|
"loss": 0.5238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5232728719711304,
|
|
"step": 610,
|
|
"valid_targets_mean": 28782.3,
|
|
"valid_targets_min": 21471
|
|
},
|
|
{
|
|
"epoch": 0.656350053361793,
|
|
"grad_norm": 0.8398684106900386,
|
|
"learning_rate": 0.00016373333333333335,
|
|
"loss": 0.5227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.525774359703064,
|
|
"step": 615,
|
|
"valid_targets_mean": 28889.3,
|
|
"valid_targets_min": 19959
|
|
},
|
|
{
|
|
"epoch": 0.6616862326574173,
|
|
"grad_norm": 0.7198912861291683,
|
|
"learning_rate": 0.00016506666666666668,
|
|
"loss": 0.5244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5229619741439819,
|
|
"step": 620,
|
|
"valid_targets_mean": 28760.8,
|
|
"valid_targets_min": 17978
|
|
},
|
|
{
|
|
"epoch": 0.6670224119530416,
|
|
"grad_norm": 0.9190832295110807,
|
|
"learning_rate": 0.0001664,
|
|
"loss": 0.5231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.52231764793396,
|
|
"step": 625,
|
|
"valid_targets_mean": 28903.8,
|
|
"valid_targets_min": 18702
|
|
},
|
|
{
|
|
"epoch": 0.6723585912486659,
|
|
"grad_norm": 0.9169151703365269,
|
|
"learning_rate": 0.00016773333333333334,
|
|
"loss": 0.5243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5236749649047852,
|
|
"step": 630,
|
|
"valid_targets_mean": 28919.6,
|
|
"valid_targets_min": 18070
|
|
},
|
|
{
|
|
"epoch": 0.6776947705442903,
|
|
"grad_norm": 0.9440014677930404,
|
|
"learning_rate": 0.0001690666666666667,
|
|
"loss": 0.5219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.519328236579895,
|
|
"step": 635,
|
|
"valid_targets_mean": 28835.2,
|
|
"valid_targets_min": 22006
|
|
},
|
|
{
|
|
"epoch": 0.6830309498399146,
|
|
"grad_norm": 1.1226009385172517,
|
|
"learning_rate": 0.0001704,
|
|
"loss": 0.5226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.524982213973999,
|
|
"step": 640,
|
|
"valid_targets_mean": 28876.1,
|
|
"valid_targets_min": 17173
|
|
},
|
|
{
|
|
"epoch": 0.688367129135539,
|
|
"grad_norm": 0.7893843871981923,
|
|
"learning_rate": 0.00017173333333333335,
|
|
"loss": 0.5218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5177816152572632,
|
|
"step": 645,
|
|
"valid_targets_mean": 28941.9,
|
|
"valid_targets_min": 23033
|
|
},
|
|
{
|
|
"epoch": 0.6937033084311632,
|
|
"grad_norm": 0.9900573740334639,
|
|
"learning_rate": 0.00017306666666666665,
|
|
"loss": 0.5232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5200702548027039,
|
|
"step": 650,
|
|
"valid_targets_mean": 28843.3,
|
|
"valid_targets_min": 22189
|
|
},
|
|
{
|
|
"epoch": 0.6990394877267876,
|
|
"grad_norm": 1.050873220431905,
|
|
"learning_rate": 0.0001744,
|
|
"loss": 0.5218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.521094799041748,
|
|
"step": 655,
|
|
"valid_targets_mean": 28854.3,
|
|
"valid_targets_min": 22418
|
|
},
|
|
{
|
|
"epoch": 0.7043756670224119,
|
|
"grad_norm": 0.9507076490054547,
|
|
"learning_rate": 0.00017573333333333334,
|
|
"loss": 0.521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5252164006233215,
|
|
"step": 660,
|
|
"valid_targets_mean": 28942.5,
|
|
"valid_targets_min": 21262
|
|
},
|
|
{
|
|
"epoch": 0.7097118463180363,
|
|
"grad_norm": 0.9417609906453398,
|
|
"learning_rate": 0.00017706666666666667,
|
|
"loss": 0.5215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5187242031097412,
|
|
"step": 665,
|
|
"valid_targets_mean": 28845.2,
|
|
"valid_targets_min": 19004
|
|
},
|
|
{
|
|
"epoch": 0.7150480256136607,
|
|
"grad_norm": 0.7494082136939058,
|
|
"learning_rate": 0.0001784,
|
|
"loss": 0.5203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5231742858886719,
|
|
"step": 670,
|
|
"valid_targets_mean": 28843.6,
|
|
"valid_targets_min": 19456
|
|
},
|
|
{
|
|
"epoch": 0.7203842049092849,
|
|
"grad_norm": 1.0576258309034712,
|
|
"learning_rate": 0.00017973333333333333,
|
|
"loss": 0.5228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5267496109008789,
|
|
"step": 675,
|
|
"valid_targets_mean": 28791.6,
|
|
"valid_targets_min": 18590
|
|
},
|
|
{
|
|
"epoch": 0.7257203842049093,
|
|
"grad_norm": 1.0947745640355913,
|
|
"learning_rate": 0.00018106666666666669,
|
|
"loss": 0.5218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5197322368621826,
|
|
"step": 680,
|
|
"valid_targets_mean": 28867.4,
|
|
"valid_targets_min": 23073
|
|
},
|
|
{
|
|
"epoch": 0.7310565635005336,
|
|
"grad_norm": 0.8804688508632426,
|
|
"learning_rate": 0.00018240000000000002,
|
|
"loss": 0.5181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5168286561965942,
|
|
"step": 685,
|
|
"valid_targets_mean": 28865.1,
|
|
"valid_targets_min": 21222
|
|
},
|
|
{
|
|
"epoch": 0.736392742796158,
|
|
"grad_norm": 0.8377434122835627,
|
|
"learning_rate": 0.00018373333333333335,
|
|
"loss": 0.5199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5161206126213074,
|
|
"step": 690,
|
|
"valid_targets_mean": 28880.2,
|
|
"valid_targets_min": 19297
|
|
},
|
|
{
|
|
"epoch": 0.7417289220917823,
|
|
"grad_norm": 1.130069939334258,
|
|
"learning_rate": 0.00018506666666666667,
|
|
"loss": 0.5186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5214225053787231,
|
|
"step": 695,
|
|
"valid_targets_mean": 28741.1,
|
|
"valid_targets_min": 19445
|
|
},
|
|
{
|
|
"epoch": 0.7470651013874067,
|
|
"grad_norm": 0.8103866371897044,
|
|
"learning_rate": 0.00018640000000000003,
|
|
"loss": 0.5176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5210945010185242,
|
|
"step": 700,
|
|
"valid_targets_mean": 28913.4,
|
|
"valid_targets_min": 22165
|
|
},
|
|
{
|
|
"epoch": 0.7524012806830309,
|
|
"grad_norm": 0.9569292836049637,
|
|
"learning_rate": 0.00018773333333333333,
|
|
"loss": 0.5171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5173620581626892,
|
|
"step": 705,
|
|
"valid_targets_mean": 28761.6,
|
|
"valid_targets_min": 16988
|
|
},
|
|
{
|
|
"epoch": 0.7577374599786553,
|
|
"grad_norm": 1.0348041744035754,
|
|
"learning_rate": 0.0001890666666666667,
|
|
"loss": 0.5182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5234087109565735,
|
|
"step": 710,
|
|
"valid_targets_mean": 28747.3,
|
|
"valid_targets_min": 17862
|
|
},
|
|
{
|
|
"epoch": 0.7630736392742796,
|
|
"grad_norm": 0.6395456940178995,
|
|
"learning_rate": 0.0001904,
|
|
"loss": 0.5134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5095891952514648,
|
|
"step": 715,
|
|
"valid_targets_mean": 28773.6,
|
|
"valid_targets_min": 19732
|
|
},
|
|
{
|
|
"epoch": 0.768409818569904,
|
|
"grad_norm": 0.9053417720870189,
|
|
"learning_rate": 0.00019173333333333335,
|
|
"loss": 0.5173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.516966700553894,
|
|
"step": 720,
|
|
"valid_targets_mean": 28985.3,
|
|
"valid_targets_min": 21313
|
|
},
|
|
{
|
|
"epoch": 0.7737459978655283,
|
|
"grad_norm": 1.1812927567242297,
|
|
"learning_rate": 0.00019306666666666668,
|
|
"loss": 0.516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5194029808044434,
|
|
"step": 725,
|
|
"valid_targets_mean": 28903.3,
|
|
"valid_targets_min": 18221
|
|
},
|
|
{
|
|
"epoch": 0.7790821771611526,
|
|
"grad_norm": 1.1415316033642284,
|
|
"learning_rate": 0.0001944,
|
|
"loss": 0.5167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5210375785827637,
|
|
"step": 730,
|
|
"valid_targets_mean": 28849.7,
|
|
"valid_targets_min": 20021
|
|
},
|
|
{
|
|
"epoch": 0.7844183564567769,
|
|
"grad_norm": 0.9275823164638465,
|
|
"learning_rate": 0.00019573333333333334,
|
|
"loss": 0.5156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5161601305007935,
|
|
"step": 735,
|
|
"valid_targets_mean": 28839.5,
|
|
"valid_targets_min": 21489
|
|
},
|
|
{
|
|
"epoch": 0.7897545357524013,
|
|
"grad_norm": 1.0879808817539458,
|
|
"learning_rate": 0.00019706666666666667,
|
|
"loss": 0.5153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5184020400047302,
|
|
"step": 740,
|
|
"valid_targets_mean": 28919.2,
|
|
"valid_targets_min": 21215
|
|
},
|
|
{
|
|
"epoch": 0.7950907150480256,
|
|
"grad_norm": 0.9695487034080649,
|
|
"learning_rate": 0.0001984,
|
|
"loss": 0.514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5139423608779907,
|
|
"step": 745,
|
|
"valid_targets_mean": 28771.1,
|
|
"valid_targets_min": 21422
|
|
},
|
|
{
|
|
"epoch": 0.80042689434365,
|
|
"grad_norm": 0.7526157185372891,
|
|
"learning_rate": 0.00019973333333333335,
|
|
"loss": 0.5153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5156712532043457,
|
|
"step": 750,
|
|
"valid_targets_mean": 28994.6,
|
|
"valid_targets_min": 22465
|
|
},
|
|
{
|
|
"epoch": 0.8057630736392742,
|
|
"grad_norm": 0.8534620058988129,
|
|
"learning_rate": 0.00019999982650101714,
|
|
"loss": 0.5113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5072367787361145,
|
|
"step": 755,
|
|
"valid_targets_mean": 28920.0,
|
|
"valid_targets_min": 18753
|
|
},
|
|
{
|
|
"epoch": 0.8110992529348986,
|
|
"grad_norm": 0.9207936626555079,
|
|
"learning_rate": 0.00019999912166243107,
|
|
"loss": 0.5131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.512698769569397,
|
|
"step": 760,
|
|
"valid_targets_mean": 29037.0,
|
|
"valid_targets_min": 19954
|
|
},
|
|
{
|
|
"epoch": 0.816435432230523,
|
|
"grad_norm": 0.5737733806118188,
|
|
"learning_rate": 0.00019999787464437395,
|
|
"loss": 0.5116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5104231238365173,
|
|
"step": 765,
|
|
"valid_targets_mean": 28870.6,
|
|
"valid_targets_min": 18563
|
|
},
|
|
{
|
|
"epoch": 0.8217716115261473,
|
|
"grad_norm": 0.9923597897717296,
|
|
"learning_rate": 0.0001999960854536069,
|
|
"loss": 0.5189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5126428604125977,
|
|
"step": 770,
|
|
"valid_targets_mean": 28757.4,
|
|
"valid_targets_min": 22298
|
|
},
|
|
{
|
|
"epoch": 0.8271077908217717,
|
|
"grad_norm": 0.9603156077658574,
|
|
"learning_rate": 0.00019999375409983057,
|
|
"loss": 0.5155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5126303434371948,
|
|
"step": 775,
|
|
"valid_targets_mean": 28917.4,
|
|
"valid_targets_min": 21333
|
|
},
|
|
{
|
|
"epoch": 0.8324439701173959,
|
|
"grad_norm": 0.8981896229393682,
|
|
"learning_rate": 0.00019999088059568536,
|
|
"loss": 0.5129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5156265497207642,
|
|
"step": 780,
|
|
"valid_targets_mean": 28922.4,
|
|
"valid_targets_min": 23138
|
|
},
|
|
{
|
|
"epoch": 0.8377801494130203,
|
|
"grad_norm": 0.7400545260515701,
|
|
"learning_rate": 0.0001999874649567508,
|
|
"loss": 0.5128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5122023820877075,
|
|
"step": 785,
|
|
"valid_targets_mean": 28929.7,
|
|
"valid_targets_min": 22171
|
|
},
|
|
{
|
|
"epoch": 0.8431163287086446,
|
|
"grad_norm": 0.8913220008247429,
|
|
"learning_rate": 0.00019998350720154605,
|
|
"loss": 0.513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5137268304824829,
|
|
"step": 790,
|
|
"valid_targets_mean": 28965.1,
|
|
"valid_targets_min": 19535
|
|
},
|
|
{
|
|
"epoch": 0.848452508004269,
|
|
"grad_norm": 0.6618361104222048,
|
|
"learning_rate": 0.00019997900735152936,
|
|
"loss": 0.5111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5073462724685669,
|
|
"step": 795,
|
|
"valid_targets_mean": 28941.9,
|
|
"valid_targets_min": 20187
|
|
},
|
|
{
|
|
"epoch": 0.8537886872998933,
|
|
"grad_norm": 0.9835517191921062,
|
|
"learning_rate": 0.0001999739654310982,
|
|
"loss": 0.514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5118036270141602,
|
|
"step": 800,
|
|
"valid_targets_mean": 28857.4,
|
|
"valid_targets_min": 20301
|
|
},
|
|
{
|
|
"epoch": 0.8591248665955176,
|
|
"grad_norm": 0.7167219489963145,
|
|
"learning_rate": 0.00019996838146758915,
|
|
"loss": 0.5115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.513766884803772,
|
|
"step": 805,
|
|
"valid_targets_mean": 28856.1,
|
|
"valid_targets_min": 19253
|
|
},
|
|
{
|
|
"epoch": 0.8644610458911419,
|
|
"grad_norm": 0.8234050116161585,
|
|
"learning_rate": 0.00019996225549127753,
|
|
"loss": 0.5119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5139243006706238,
|
|
"step": 810,
|
|
"valid_targets_mean": 28901.4,
|
|
"valid_targets_min": 21836
|
|
},
|
|
{
|
|
"epoch": 0.8697972251867663,
|
|
"grad_norm": 1.0887508688343202,
|
|
"learning_rate": 0.0001999555875353774,
|
|
"loss": 0.5111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5112956762313843,
|
|
"step": 815,
|
|
"valid_targets_mean": 28909.4,
|
|
"valid_targets_min": 20239
|
|
},
|
|
{
|
|
"epoch": 0.8751334044823906,
|
|
"grad_norm": 0.876878433615169,
|
|
"learning_rate": 0.0001999483776360414,
|
|
"loss": 0.5101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5132863521575928,
|
|
"step": 820,
|
|
"valid_targets_mean": 28695.7,
|
|
"valid_targets_min": 18285
|
|
},
|
|
{
|
|
"epoch": 0.880469583778015,
|
|
"grad_norm": 0.8413466528813544,
|
|
"learning_rate": 0.00019994062583236045,
|
|
"loss": 0.5078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5094743967056274,
|
|
"step": 825,
|
|
"valid_targets_mean": 28791.7,
|
|
"valid_targets_min": 20225
|
|
},
|
|
{
|
|
"epoch": 0.8858057630736392,
|
|
"grad_norm": 0.8600849420247135,
|
|
"learning_rate": 0.00019993233216636358,
|
|
"loss": 0.5115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5101096630096436,
|
|
"step": 830,
|
|
"valid_targets_mean": 28825.4,
|
|
"valid_targets_min": 22899
|
|
},
|
|
{
|
|
"epoch": 0.8911419423692636,
|
|
"grad_norm": 0.6651167236591127,
|
|
"learning_rate": 0.0001999234966830178,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5113962888717651,
|
|
"step": 835,
|
|
"valid_targets_mean": 28849.6,
|
|
"valid_targets_min": 21756
|
|
},
|
|
{
|
|
"epoch": 0.8964781216648879,
|
|
"grad_norm": 1.0818540858113659,
|
|
"learning_rate": 0.00019991411943022768,
|
|
"loss": 0.5114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5127806067466736,
|
|
"step": 840,
|
|
"valid_targets_mean": 29012.6,
|
|
"valid_targets_min": 20124
|
|
},
|
|
{
|
|
"epoch": 0.9018143009605123,
|
|
"grad_norm": 0.7661149143265992,
|
|
"learning_rate": 0.00019990420045883518,
|
|
"loss": 0.5093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5133446455001831,
|
|
"step": 845,
|
|
"valid_targets_mean": 28692.7,
|
|
"valid_targets_min": 21532
|
|
},
|
|
{
|
|
"epoch": 0.9071504802561366,
|
|
"grad_norm": 0.7850553702573292,
|
|
"learning_rate": 0.00019989373982261946,
|
|
"loss": 0.5075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.508018970489502,
|
|
"step": 850,
|
|
"valid_targets_mean": 28888.1,
|
|
"valid_targets_min": 22096
|
|
},
|
|
{
|
|
"epoch": 0.9124866595517609,
|
|
"grad_norm": 0.957979899606732,
|
|
"learning_rate": 0.0001998827375782964,
|
|
"loss": 0.508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5046581029891968,
|
|
"step": 855,
|
|
"valid_targets_mean": 28860.0,
|
|
"valid_targets_min": 18914
|
|
},
|
|
{
|
|
"epoch": 0.9178228388473852,
|
|
"grad_norm": 0.8046053319649763,
|
|
"learning_rate": 0.0001998711937855184,
|
|
"loss": 0.5064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5036020278930664,
|
|
"step": 860,
|
|
"valid_targets_mean": 28765.5,
|
|
"valid_targets_min": 17484
|
|
},
|
|
{
|
|
"epoch": 0.9231590181430096,
|
|
"grad_norm": 0.7647829514060208,
|
|
"learning_rate": 0.00019985910850687413,
|
|
"loss": 0.5087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5098488330841064,
|
|
"step": 865,
|
|
"valid_targets_mean": 28809.4,
|
|
"valid_targets_min": 21450
|
|
},
|
|
{
|
|
"epoch": 0.928495197438634,
|
|
"grad_norm": 0.8215795314493268,
|
|
"learning_rate": 0.00019984648180788804,
|
|
"loss": 0.5043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5045159459114075,
|
|
"step": 870,
|
|
"valid_targets_mean": 28818.4,
|
|
"valid_targets_min": 19185
|
|
},
|
|
{
|
|
"epoch": 0.9338313767342583,
|
|
"grad_norm": 0.8191498775289832,
|
|
"learning_rate": 0.00019983331375702016,
|
|
"loss": 0.5032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5012009143829346,
|
|
"step": 875,
|
|
"valid_targets_mean": 28765.6,
|
|
"valid_targets_min": 18225
|
|
},
|
|
{
|
|
"epoch": 0.9391675560298826,
|
|
"grad_norm": 0.6547442901261715,
|
|
"learning_rate": 0.00019981960442566553,
|
|
"loss": 0.5054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5023826360702515,
|
|
"step": 880,
|
|
"valid_targets_mean": 28933.4,
|
|
"valid_targets_min": 20988
|
|
},
|
|
{
|
|
"epoch": 0.9445037353255069,
|
|
"grad_norm": 0.7730738931584743,
|
|
"learning_rate": 0.000199805353888154,
|
|
"loss": 0.5052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5060917139053345,
|
|
"step": 885,
|
|
"valid_targets_mean": 28948.6,
|
|
"valid_targets_min": 18118
|
|
},
|
|
{
|
|
"epoch": 0.9498399146211313,
|
|
"grad_norm": 0.7811823083404794,
|
|
"learning_rate": 0.00019979056222174972,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5014211535453796,
|
|
"step": 890,
|
|
"valid_targets_mean": 28850.7,
|
|
"valid_targets_min": 22683
|
|
},
|
|
{
|
|
"epoch": 0.9551760939167556,
|
|
"grad_norm": 0.8319739955826252,
|
|
"learning_rate": 0.0001997752295066508,
|
|
"loss": 0.5051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5034403800964355,
|
|
"step": 895,
|
|
"valid_targets_mean": 29030.9,
|
|
"valid_targets_min": 19158
|
|
},
|
|
{
|
|
"epoch": 0.96051227321238,
|
|
"grad_norm": 0.7282233904577727,
|
|
"learning_rate": 0.0001997593558259888,
|
|
"loss": 0.5037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5004850625991821,
|
|
"step": 900,
|
|
"valid_targets_mean": 28900.0,
|
|
"valid_targets_min": 21973
|
|
},
|
|
{
|
|
"epoch": 0.9658484525080042,
|
|
"grad_norm": 1.3121763739243963,
|
|
"learning_rate": 0.00019974294126582826,
|
|
"loss": 0.5068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5165523886680603,
|
|
"step": 905,
|
|
"valid_targets_mean": 28758.2,
|
|
"valid_targets_min": 16583
|
|
},
|
|
{
|
|
"epoch": 0.9711846318036286,
|
|
"grad_norm": 1.0382108685703912,
|
|
"learning_rate": 0.00019972598591516642,
|
|
"loss": 0.5069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5057257413864136,
|
|
"step": 910,
|
|
"valid_targets_mean": 28712.1,
|
|
"valid_targets_min": 18488
|
|
},
|
|
{
|
|
"epoch": 0.9765208110992529,
|
|
"grad_norm": 0.7243104931637121,
|
|
"learning_rate": 0.00019970848986593248,
|
|
"loss": 0.5072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5118848085403442,
|
|
"step": 915,
|
|
"valid_targets_mean": 28952.7,
|
|
"valid_targets_min": 23291
|
|
},
|
|
{
|
|
"epoch": 0.9818569903948773,
|
|
"grad_norm": 0.7418714866265343,
|
|
"learning_rate": 0.00019969045321298725,
|
|
"loss": 0.5072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.506354033946991,
|
|
"step": 920,
|
|
"valid_targets_mean": 28869.9,
|
|
"valid_targets_min": 22004
|
|
},
|
|
{
|
|
"epoch": 0.9871931696905016,
|
|
"grad_norm": 0.7193766855257959,
|
|
"learning_rate": 0.00019967187605412262,
|
|
"loss": 0.5035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5024032592773438,
|
|
"step": 925,
|
|
"valid_targets_mean": 29032.0,
|
|
"valid_targets_min": 20042
|
|
},
|
|
{
|
|
"epoch": 0.9925293489861259,
|
|
"grad_norm": 0.8126562988133832,
|
|
"learning_rate": 0.00019965275849006102,
|
|
"loss": 0.5036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5021684765815735,
|
|
"step": 930,
|
|
"valid_targets_mean": 29001.4,
|
|
"valid_targets_min": 19151
|
|
},
|
|
{
|
|
"epoch": 0.9978655282817502,
|
|
"grad_norm": 0.729906175244711,
|
|
"learning_rate": 0.00019963310062445487,
|
|
"loss": 0.5022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.504224419593811,
|
|
"step": 935,
|
|
"valid_targets_mean": 28850.6,
|
|
"valid_targets_min": 22676
|
|
},
|
|
{
|
|
"epoch": 1.0032017075773747,
|
|
"grad_norm": 0.8857335048779751,
|
|
"learning_rate": 0.00019961290256388604,
|
|
"loss": 0.501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5056629776954651,
|
|
"step": 940,
|
|
"valid_targets_mean": 28686.6,
|
|
"valid_targets_min": 19870
|
|
},
|
|
{
|
|
"epoch": 1.0085378868729988,
|
|
"grad_norm": 0.7714220146823638,
|
|
"learning_rate": 0.00019959216441786524,
|
|
"loss": 0.4982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49670982360839844,
|
|
"step": 945,
|
|
"valid_targets_mean": 28700.9,
|
|
"valid_targets_min": 21769
|
|
},
|
|
{
|
|
"epoch": 1.0138740661686232,
|
|
"grad_norm": 0.8056427198462525,
|
|
"learning_rate": 0.00019957088629883135,
|
|
"loss": 0.4987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4962517023086548,
|
|
"step": 950,
|
|
"valid_targets_mean": 28952.8,
|
|
"valid_targets_min": 21902
|
|
},
|
|
{
|
|
"epoch": 1.0192102454642475,
|
|
"grad_norm": 0.5467811180272533,
|
|
"learning_rate": 0.00019954906832215103,
|
|
"loss": 0.4972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49847257137298584,
|
|
"step": 955,
|
|
"valid_targets_mean": 28823.4,
|
|
"valid_targets_min": 22059
|
|
},
|
|
{
|
|
"epoch": 1.024546424759872,
|
|
"grad_norm": 1.1696041007065114,
|
|
"learning_rate": 0.00019952671060611792,
|
|
"loss": 0.5008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5013498663902283,
|
|
"step": 960,
|
|
"valid_targets_mean": 28771.9,
|
|
"valid_targets_min": 19840
|
|
},
|
|
{
|
|
"epoch": 1.0298826040554963,
|
|
"grad_norm": 0.7944049884311958,
|
|
"learning_rate": 0.000199503813271952,
|
|
"loss": 0.5004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5025086402893066,
|
|
"step": 965,
|
|
"valid_targets_mean": 28858.1,
|
|
"valid_targets_min": 21155
|
|
},
|
|
{
|
|
"epoch": 1.0352187833511206,
|
|
"grad_norm": 0.6984525409939394,
|
|
"learning_rate": 0.00019948037644379905,
|
|
"loss": 0.4979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4993964433670044,
|
|
"step": 970,
|
|
"valid_targets_mean": 29067.5,
|
|
"valid_targets_min": 20386
|
|
},
|
|
{
|
|
"epoch": 1.040554962646745,
|
|
"grad_norm": 0.7610988833368267,
|
|
"learning_rate": 0.0001994564002487298,
|
|
"loss": 0.4993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5011273622512817,
|
|
"step": 975,
|
|
"valid_targets_mean": 28738.2,
|
|
"valid_targets_min": 19725
|
|
},
|
|
{
|
|
"epoch": 1.0458911419423693,
|
|
"grad_norm": 0.8782724630795262,
|
|
"learning_rate": 0.00019943188481673946,
|
|
"loss": 0.496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49535125494003296,
|
|
"step": 980,
|
|
"valid_targets_mean": 28957.1,
|
|
"valid_targets_min": 21418
|
|
},
|
|
{
|
|
"epoch": 1.0512273212379937,
|
|
"grad_norm": 0.8026106911230974,
|
|
"learning_rate": 0.00019940683028074675,
|
|
"loss": 0.4974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49761420488357544,
|
|
"step": 985,
|
|
"valid_targets_mean": 28961.3,
|
|
"valid_targets_min": 22587
|
|
},
|
|
{
|
|
"epoch": 1.056563500533618,
|
|
"grad_norm": 0.7959574826279312,
|
|
"learning_rate": 0.00019938123677659352,
|
|
"loss": 0.4973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49720531702041626,
|
|
"step": 990,
|
|
"valid_targets_mean": 29085.9,
|
|
"valid_targets_min": 23594
|
|
},
|
|
{
|
|
"epoch": 1.0618996798292422,
|
|
"grad_norm": 0.6285283346775284,
|
|
"learning_rate": 0.00019935510444304364,
|
|
"loss": 0.497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4930610656738281,
|
|
"step": 995,
|
|
"valid_targets_mean": 28967.6,
|
|
"valid_targets_min": 20145
|
|
},
|
|
{
|
|
"epoch": 1.0672358591248665,
|
|
"grad_norm": 0.9256561165916987,
|
|
"learning_rate": 0.00019932843342178254,
|
|
"loss": 0.4966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49736061692237854,
|
|
"step": 1000,
|
|
"valid_targets_mean": 29112.2,
|
|
"valid_targets_min": 23359
|
|
},
|
|
{
|
|
"epoch": 1.0725720384204909,
|
|
"grad_norm": 0.7313284825114021,
|
|
"learning_rate": 0.00019930122385741625,
|
|
"loss": 0.5005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5027332305908203,
|
|
"step": 1005,
|
|
"valid_targets_mean": 28745.6,
|
|
"valid_targets_min": 23079
|
|
},
|
|
{
|
|
"epoch": 1.0779082177161152,
|
|
"grad_norm": 0.7342069358731735,
|
|
"learning_rate": 0.00019927347589747082,
|
|
"loss": 0.4981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49375826120376587,
|
|
"step": 1010,
|
|
"valid_targets_mean": 29010.8,
|
|
"valid_targets_min": 19817
|
|
},
|
|
{
|
|
"epoch": 1.0832443970117396,
|
|
"grad_norm": 0.7672321385196467,
|
|
"learning_rate": 0.0001992451896923912,
|
|
"loss": 0.4957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49502408504486084,
|
|
"step": 1015,
|
|
"valid_targets_mean": 28889.4,
|
|
"valid_targets_min": 22578
|
|
},
|
|
{
|
|
"epoch": 1.088580576307364,
|
|
"grad_norm": 0.8271950543331492,
|
|
"learning_rate": 0.00019921636539554084,
|
|
"loss": 0.4977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4977359175682068,
|
|
"step": 1020,
|
|
"valid_targets_mean": 28720.5,
|
|
"valid_targets_min": 20101
|
|
},
|
|
{
|
|
"epoch": 1.0939167556029883,
|
|
"grad_norm": 0.7373693958339582,
|
|
"learning_rate": 0.00019918700316320044,
|
|
"loss": 0.4958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4936048686504364,
|
|
"step": 1025,
|
|
"valid_targets_mean": 29039.2,
|
|
"valid_targets_min": 21531
|
|
},
|
|
{
|
|
"epoch": 1.0992529348986126,
|
|
"grad_norm": 0.6151459246691149,
|
|
"learning_rate": 0.00019915710315456748,
|
|
"loss": 0.4927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4901975095272064,
|
|
"step": 1030,
|
|
"valid_targets_mean": 28976.8,
|
|
"valid_targets_min": 21388
|
|
},
|
|
{
|
|
"epoch": 1.104589114194237,
|
|
"grad_norm": 0.8842826137042493,
|
|
"learning_rate": 0.00019912666553175502,
|
|
"loss": 0.4944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4999387860298157,
|
|
"step": 1035,
|
|
"valid_targets_mean": 28779.4,
|
|
"valid_targets_min": 20977
|
|
},
|
|
{
|
|
"epoch": 1.1099252934898614,
|
|
"grad_norm": 0.809143508702625,
|
|
"learning_rate": 0.0001990956904597911,
|
|
"loss": 0.4937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48766613006591797,
|
|
"step": 1040,
|
|
"valid_targets_mean": 28873.3,
|
|
"valid_targets_min": 20283
|
|
},
|
|
{
|
|
"epoch": 1.1152614727854857,
|
|
"grad_norm": 0.8399523521804738,
|
|
"learning_rate": 0.0001990641781066177,
|
|
"loss": 0.4941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49525129795074463,
|
|
"step": 1045,
|
|
"valid_targets_mean": 28644.3,
|
|
"valid_targets_min": 18907
|
|
},
|
|
{
|
|
"epoch": 1.1205976520811098,
|
|
"grad_norm": 0.767585575338914,
|
|
"learning_rate": 0.00019903212864308975,
|
|
"loss": 0.4937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4969574809074402,
|
|
"step": 1050,
|
|
"valid_targets_mean": 28797.3,
|
|
"valid_targets_min": 21043
|
|
},
|
|
{
|
|
"epoch": 1.1259338313767342,
|
|
"grad_norm": 0.7413224378945623,
|
|
"learning_rate": 0.00019899954224297442,
|
|
"loss": 0.4953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49663180112838745,
|
|
"step": 1055,
|
|
"valid_targets_mean": 28732.7,
|
|
"valid_targets_min": 23116
|
|
},
|
|
{
|
|
"epoch": 1.1312700106723586,
|
|
"grad_norm": 0.7658592966316667,
|
|
"learning_rate": 0.00019896641908295006,
|
|
"loss": 0.4917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4922265410423279,
|
|
"step": 1060,
|
|
"valid_targets_mean": 28798.4,
|
|
"valid_targets_min": 20032
|
|
},
|
|
{
|
|
"epoch": 1.136606189967983,
|
|
"grad_norm": 0.6444301983737417,
|
|
"learning_rate": 0.00019893275934260523,
|
|
"loss": 0.4943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49562233686447144,
|
|
"step": 1065,
|
|
"valid_targets_mean": 28789.7,
|
|
"valid_targets_min": 21020
|
|
},
|
|
{
|
|
"epoch": 1.1419423692636073,
|
|
"grad_norm": 0.9172448797269533,
|
|
"learning_rate": 0.00019889856320443768,
|
|
"loss": 0.4932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4929046928882599,
|
|
"step": 1070,
|
|
"valid_targets_mean": 28764.4,
|
|
"valid_targets_min": 19307
|
|
},
|
|
{
|
|
"epoch": 1.1472785485592316,
|
|
"grad_norm": 0.726566080960612,
|
|
"learning_rate": 0.00019886383085385351,
|
|
"loss": 0.4949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49681541323661804,
|
|
"step": 1075,
|
|
"valid_targets_mean": 28777.3,
|
|
"valid_targets_min": 18981
|
|
},
|
|
{
|
|
"epoch": 1.152614727854856,
|
|
"grad_norm": 0.7444225763087141,
|
|
"learning_rate": 0.00019882856247916607,
|
|
"loss": 0.4939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.494374543428421,
|
|
"step": 1080,
|
|
"valid_targets_mean": 28757.2,
|
|
"valid_targets_min": 19339
|
|
},
|
|
{
|
|
"epoch": 1.1579509071504803,
|
|
"grad_norm": 0.6678571992056567,
|
|
"learning_rate": 0.00019879275827159486,
|
|
"loss": 0.4919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49021148681640625,
|
|
"step": 1085,
|
|
"valid_targets_mean": 28788.7,
|
|
"valid_targets_min": 21441
|
|
},
|
|
{
|
|
"epoch": 1.1632870864461047,
|
|
"grad_norm": 0.9789421150873826,
|
|
"learning_rate": 0.00019875641842526473,
|
|
"loss": 0.4913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49386441707611084,
|
|
"step": 1090,
|
|
"valid_targets_mean": 28768.3,
|
|
"valid_targets_min": 19307
|
|
},
|
|
{
|
|
"epoch": 1.1686232657417288,
|
|
"grad_norm": 0.7388801470880308,
|
|
"learning_rate": 0.00019871954313720454,
|
|
"loss": 0.4903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49126654863357544,
|
|
"step": 1095,
|
|
"valid_targets_mean": 28960.7,
|
|
"valid_targets_min": 20739
|
|
},
|
|
{
|
|
"epoch": 1.1739594450373532,
|
|
"grad_norm": 0.7313633738701135,
|
|
"learning_rate": 0.00019868213260734635,
|
|
"loss": 0.4921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4909404516220093,
|
|
"step": 1100,
|
|
"valid_targets_mean": 28717.3,
|
|
"valid_targets_min": 19151
|
|
},
|
|
{
|
|
"epoch": 1.1792956243329775,
|
|
"grad_norm": 0.6847916798801468,
|
|
"learning_rate": 0.00019864418703852406,
|
|
"loss": 0.4921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49161458015441895,
|
|
"step": 1105,
|
|
"valid_targets_mean": 28783.6,
|
|
"valid_targets_min": 18619
|
|
},
|
|
{
|
|
"epoch": 1.1846318036286019,
|
|
"grad_norm": 0.5992723218408188,
|
|
"learning_rate": 0.00019860570663647264,
|
|
"loss": 0.4931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49217694997787476,
|
|
"step": 1110,
|
|
"valid_targets_mean": 28852.7,
|
|
"valid_targets_min": 21815
|
|
},
|
|
{
|
|
"epoch": 1.1899679829242262,
|
|
"grad_norm": 0.7088694327208672,
|
|
"learning_rate": 0.00019856669160982674,
|
|
"loss": 0.4902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4884410500526428,
|
|
"step": 1115,
|
|
"valid_targets_mean": 28801.8,
|
|
"valid_targets_min": 20517
|
|
},
|
|
{
|
|
"epoch": 1.1953041622198506,
|
|
"grad_norm": 1.0128547253551068,
|
|
"learning_rate": 0.00019852714217011966,
|
|
"loss": 0.4901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4990355372428894,
|
|
"step": 1120,
|
|
"valid_targets_mean": 28929.9,
|
|
"valid_targets_min": 21103
|
|
},
|
|
{
|
|
"epoch": 1.200640341515475,
|
|
"grad_norm": 0.732187179688228,
|
|
"learning_rate": 0.0001984870585317823,
|
|
"loss": 0.4946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4941519498825073,
|
|
"step": 1125,
|
|
"valid_targets_mean": 28764.2,
|
|
"valid_targets_min": 21373
|
|
},
|
|
{
|
|
"epoch": 1.2059765208110993,
|
|
"grad_norm": 0.7655997856942812,
|
|
"learning_rate": 0.00019844644091214172,
|
|
"loss": 0.4918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49341729283332825,
|
|
"step": 1130,
|
|
"valid_targets_mean": 28565.8,
|
|
"valid_targets_min": 22833
|
|
},
|
|
{
|
|
"epoch": 1.2113127001067236,
|
|
"grad_norm": 0.6939402595770039,
|
|
"learning_rate": 0.00019840528953142032,
|
|
"loss": 0.4919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49373674392700195,
|
|
"step": 1135,
|
|
"valid_targets_mean": 29037.3,
|
|
"valid_targets_min": 21268
|
|
},
|
|
{
|
|
"epoch": 1.216648879402348,
|
|
"grad_norm": 0.8636563092762944,
|
|
"learning_rate": 0.0001983636046127344,
|
|
"loss": 0.4924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4902014136314392,
|
|
"step": 1140,
|
|
"valid_targets_mean": 28870.8,
|
|
"valid_targets_min": 19107
|
|
},
|
|
{
|
|
"epoch": 1.2219850586979724,
|
|
"grad_norm": 0.8594445582468951,
|
|
"learning_rate": 0.00019832138638209296,
|
|
"loss": 0.4895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4929233491420746,
|
|
"step": 1145,
|
|
"valid_targets_mean": 28872.9,
|
|
"valid_targets_min": 20602
|
|
},
|
|
{
|
|
"epoch": 1.2273212379935965,
|
|
"grad_norm": 0.7297644042655806,
|
|
"learning_rate": 0.00019827863506839666,
|
|
"loss": 0.4918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4909535050392151,
|
|
"step": 1150,
|
|
"valid_targets_mean": 28989.2,
|
|
"valid_targets_min": 21947
|
|
},
|
|
{
|
|
"epoch": 1.2326574172892208,
|
|
"grad_norm": 0.765568627312834,
|
|
"learning_rate": 0.00019823535090343636,
|
|
"loss": 0.4906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48845890164375305,
|
|
"step": 1155,
|
|
"valid_targets_mean": 28913.0,
|
|
"valid_targets_min": 20358
|
|
},
|
|
{
|
|
"epoch": 1.2379935965848452,
|
|
"grad_norm": 0.7025418654218685,
|
|
"learning_rate": 0.00019819153412189194,
|
|
"loss": 0.4877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48771578073501587,
|
|
"step": 1160,
|
|
"valid_targets_mean": 28797.3,
|
|
"valid_targets_min": 22345
|
|
},
|
|
{
|
|
"epoch": 1.2433297758804696,
|
|
"grad_norm": 0.5607803160935012,
|
|
"learning_rate": 0.00019814718496133108,
|
|
"loss": 0.4894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4898512363433838,
|
|
"step": 1165,
|
|
"valid_targets_mean": 28758.1,
|
|
"valid_targets_min": 17031
|
|
},
|
|
{
|
|
"epoch": 1.248665955176094,
|
|
"grad_norm": 0.9373306103102206,
|
|
"learning_rate": 0.00019810230366220803,
|
|
"loss": 0.4889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49357637763023376,
|
|
"step": 1170,
|
|
"valid_targets_mean": 28874.6,
|
|
"valid_targets_min": 22060
|
|
},
|
|
{
|
|
"epoch": 1.2540021344717183,
|
|
"grad_norm": 0.893301966708018,
|
|
"learning_rate": 0.00019805689046786202,
|
|
"loss": 0.4893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4874823987483978,
|
|
"step": 1175,
|
|
"valid_targets_mean": 28821.3,
|
|
"valid_targets_min": 16692
|
|
},
|
|
{
|
|
"epoch": 1.2593383137673426,
|
|
"grad_norm": 0.6848632438681702,
|
|
"learning_rate": 0.00019801094562451628,
|
|
"loss": 0.4904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49153780937194824,
|
|
"step": 1180,
|
|
"valid_targets_mean": 28801.0,
|
|
"valid_targets_min": 22226
|
|
},
|
|
{
|
|
"epoch": 1.264674493062967,
|
|
"grad_norm": 0.9103918804063192,
|
|
"learning_rate": 0.00019796446938127646,
|
|
"loss": 0.4893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4879952073097229,
|
|
"step": 1185,
|
|
"valid_targets_mean": 28990.6,
|
|
"valid_targets_min": 23411
|
|
},
|
|
{
|
|
"epoch": 1.2700106723585913,
|
|
"grad_norm": 0.7724803021110822,
|
|
"learning_rate": 0.00019791746199012948,
|
|
"loss": 0.49,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4941154718399048,
|
|
"step": 1190,
|
|
"valid_targets_mean": 28802.4,
|
|
"valid_targets_min": 22473
|
|
},
|
|
{
|
|
"epoch": 1.2753468516542155,
|
|
"grad_norm": 0.7173208410735338,
|
|
"learning_rate": 0.00019786992370594196,
|
|
"loss": 0.4868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4847111701965332,
|
|
"step": 1195,
|
|
"valid_targets_mean": 28934.8,
|
|
"valid_targets_min": 22545
|
|
},
|
|
{
|
|
"epoch": 1.2806830309498398,
|
|
"grad_norm": 0.9767771194720062,
|
|
"learning_rate": 0.00019782185478645902,
|
|
"loss": 0.4884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49301740527153015,
|
|
"step": 1200,
|
|
"valid_targets_mean": 28512.9,
|
|
"valid_targets_min": 21252
|
|
},
|
|
{
|
|
"epoch": 1.2860192102454642,
|
|
"grad_norm": 1.0781130531596468,
|
|
"learning_rate": 0.00019777325549230273,
|
|
"loss": 0.4878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49377936124801636,
|
|
"step": 1205,
|
|
"valid_targets_mean": 28668.4,
|
|
"valid_targets_min": 18606
|
|
},
|
|
{
|
|
"epoch": 1.2913553895410885,
|
|
"grad_norm": 0.8771564583626676,
|
|
"learning_rate": 0.00019772412608697079,
|
|
"loss": 0.4917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49229565262794495,
|
|
"step": 1210,
|
|
"valid_targets_mean": 28918.4,
|
|
"valid_targets_min": 21963
|
|
},
|
|
{
|
|
"epoch": 1.2966915688367129,
|
|
"grad_norm": 0.6869349410938043,
|
|
"learning_rate": 0.00019767446683683514,
|
|
"loss": 0.4907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4881063401699066,
|
|
"step": 1215,
|
|
"valid_targets_mean": 28948.3,
|
|
"valid_targets_min": 23020
|
|
},
|
|
{
|
|
"epoch": 1.3020277481323372,
|
|
"grad_norm": 0.6921187884299926,
|
|
"learning_rate": 0.00019762427801114037,
|
|
"loss": 0.4883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4854842722415924,
|
|
"step": 1220,
|
|
"valid_targets_mean": 28842.7,
|
|
"valid_targets_min": 19872
|
|
},
|
|
{
|
|
"epoch": 1.3073639274279616,
|
|
"grad_norm": 0.6841406169395504,
|
|
"learning_rate": 0.00019757355988200237,
|
|
"loss": 0.4874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.489562451839447,
|
|
"step": 1225,
|
|
"valid_targets_mean": 28766.4,
|
|
"valid_targets_min": 17167
|
|
},
|
|
{
|
|
"epoch": 1.312700106723586,
|
|
"grad_norm": 0.6106888444494654,
|
|
"learning_rate": 0.00019752231272440683,
|
|
"loss": 0.4863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4828183054924011,
|
|
"step": 1230,
|
|
"valid_targets_mean": 28774.1,
|
|
"valid_targets_min": 19426
|
|
},
|
|
{
|
|
"epoch": 1.3180362860192103,
|
|
"grad_norm": 0.5257233425597112,
|
|
"learning_rate": 0.00019747053681620785,
|
|
"loss": 0.4868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48759758472442627,
|
|
"step": 1235,
|
|
"valid_targets_mean": 28881.2,
|
|
"valid_targets_min": 23692
|
|
},
|
|
{
|
|
"epoch": 1.3233724653148347,
|
|
"grad_norm": 0.8480441282452694,
|
|
"learning_rate": 0.00019741823243812613,
|
|
"loss": 0.4885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48881375789642334,
|
|
"step": 1240,
|
|
"valid_targets_mean": 28882.3,
|
|
"valid_targets_min": 18555
|
|
},
|
|
{
|
|
"epoch": 1.328708644610459,
|
|
"grad_norm": 0.62182352123255,
|
|
"learning_rate": 0.0001973653998737478,
|
|
"loss": 0.4872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4828396439552307,
|
|
"step": 1245,
|
|
"valid_targets_mean": 28891.3,
|
|
"valid_targets_min": 21120
|
|
},
|
|
{
|
|
"epoch": 1.3340448239060834,
|
|
"grad_norm": 0.8004417732977367,
|
|
"learning_rate": 0.0001973120394095227,
|
|
"loss": 0.4843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4856739044189453,
|
|
"step": 1250,
|
|
"valid_targets_mean": 28789.2,
|
|
"valid_targets_min": 17800
|
|
},
|
|
{
|
|
"epoch": 1.3393810032017075,
|
|
"grad_norm": 0.7440408894557307,
|
|
"learning_rate": 0.00019725815133476288,
|
|
"loss": 0.4849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4856306314468384,
|
|
"step": 1255,
|
|
"valid_targets_mean": 28959.5,
|
|
"valid_targets_min": 22814
|
|
},
|
|
{
|
|
"epoch": 1.3447171824973319,
|
|
"grad_norm": 0.8407471815003433,
|
|
"learning_rate": 0.00019720373594164095,
|
|
"loss": 0.4854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48332279920578003,
|
|
"step": 1260,
|
|
"valid_targets_mean": 28746.5,
|
|
"valid_targets_min": 21515
|
|
},
|
|
{
|
|
"epoch": 1.3500533617929562,
|
|
"grad_norm": 0.5810747016865329,
|
|
"learning_rate": 0.00019714879352518858,
|
|
"loss": 0.4865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4885227680206299,
|
|
"step": 1265,
|
|
"valid_targets_mean": 28916.6,
|
|
"valid_targets_min": 22001
|
|
},
|
|
{
|
|
"epoch": 1.3553895410885806,
|
|
"grad_norm": 0.7600897840310242,
|
|
"learning_rate": 0.0001970933243832949,
|
|
"loss": 0.4859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4816964268684387,
|
|
"step": 1270,
|
|
"valid_targets_mean": 28778.2,
|
|
"valid_targets_min": 18530
|
|
},
|
|
{
|
|
"epoch": 1.360725720384205,
|
|
"grad_norm": 0.7625847143527156,
|
|
"learning_rate": 0.00019703732881670488,
|
|
"loss": 0.4847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4828633666038513,
|
|
"step": 1275,
|
|
"valid_targets_mean": 28908.7,
|
|
"valid_targets_min": 21741
|
|
},
|
|
{
|
|
"epoch": 1.3660618996798293,
|
|
"grad_norm": 0.7707967457925649,
|
|
"learning_rate": 0.00019698080712901765,
|
|
"loss": 0.4846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48737090826034546,
|
|
"step": 1280,
|
|
"valid_targets_mean": 28797.5,
|
|
"valid_targets_min": 20272
|
|
},
|
|
{
|
|
"epoch": 1.3713980789754536,
|
|
"grad_norm": 0.7118713515121713,
|
|
"learning_rate": 0.00019692375962668485,
|
|
"loss": 0.4835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4837408661842346,
|
|
"step": 1285,
|
|
"valid_targets_mean": 28756.1,
|
|
"valid_targets_min": 18040
|
|
},
|
|
{
|
|
"epoch": 1.376734258271078,
|
|
"grad_norm": 0.8007734251793294,
|
|
"learning_rate": 0.0001968661866190091,
|
|
"loss": 0.4848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.486257404088974,
|
|
"step": 1290,
|
|
"valid_targets_mean": 29051.0,
|
|
"valid_targets_min": 22393
|
|
},
|
|
{
|
|
"epoch": 1.3820704375667021,
|
|
"grad_norm": 0.707338396707381,
|
|
"learning_rate": 0.00019680808841814206,
|
|
"loss": 0.4823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4817184805870056,
|
|
"step": 1295,
|
|
"valid_targets_mean": 28809.5,
|
|
"valid_targets_min": 20321
|
|
},
|
|
{
|
|
"epoch": 1.3874066168623265,
|
|
"grad_norm": 0.6311339270049728,
|
|
"learning_rate": 0.00019674946533908315,
|
|
"loss": 0.482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4838714003562927,
|
|
"step": 1300,
|
|
"valid_targets_mean": 28615.7,
|
|
"valid_targets_min": 19155
|
|
},
|
|
{
|
|
"epoch": 1.3927427961579508,
|
|
"grad_norm": 0.7947663440225755,
|
|
"learning_rate": 0.0001966903176996774,
|
|
"loss": 0.4819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4826429486274719,
|
|
"step": 1305,
|
|
"valid_targets_mean": 28764.1,
|
|
"valid_targets_min": 19535
|
|
},
|
|
{
|
|
"epoch": 1.3980789754535752,
|
|
"grad_norm": 0.5399171640364682,
|
|
"learning_rate": 0.00019663064582061397,
|
|
"loss": 0.4828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48236802220344543,
|
|
"step": 1310,
|
|
"valid_targets_mean": 28867.7,
|
|
"valid_targets_min": 21662
|
|
},
|
|
{
|
|
"epoch": 1.4034151547491995,
|
|
"grad_norm": 0.7661910850810453,
|
|
"learning_rate": 0.00019657045002542442,
|
|
"loss": 0.4829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48537033796310425,
|
|
"step": 1315,
|
|
"valid_targets_mean": 28852.9,
|
|
"valid_targets_min": 18125
|
|
},
|
|
{
|
|
"epoch": 1.4087513340448239,
|
|
"grad_norm": 0.8066822149152647,
|
|
"learning_rate": 0.00019650973064048083,
|
|
"loss": 0.4808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48364531993865967,
|
|
"step": 1320,
|
|
"valid_targets_mean": 28965.1,
|
|
"valid_targets_min": 18255
|
|
},
|
|
{
|
|
"epoch": 1.4140875133404482,
|
|
"grad_norm": 0.6847873777978073,
|
|
"learning_rate": 0.00019644848799499413,
|
|
"loss": 0.4826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47989529371261597,
|
|
"step": 1325,
|
|
"valid_targets_mean": 28791.4,
|
|
"valid_targets_min": 17820
|
|
},
|
|
{
|
|
"epoch": 1.4194236926360726,
|
|
"grad_norm": 0.5872084117746706,
|
|
"learning_rate": 0.0001963867224210123,
|
|
"loss": 0.4823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48351019620895386,
|
|
"step": 1330,
|
|
"valid_targets_mean": 28846.1,
|
|
"valid_targets_min": 17670
|
|
},
|
|
{
|
|
"epoch": 1.424759871931697,
|
|
"grad_norm": 0.6656519836568731,
|
|
"learning_rate": 0.00019632443425341854,
|
|
"loss": 0.4831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.480230450630188,
|
|
"step": 1335,
|
|
"valid_targets_mean": 28764.0,
|
|
"valid_targets_min": 21322
|
|
},
|
|
{
|
|
"epoch": 1.4300960512273213,
|
|
"grad_norm": 0.588635393865431,
|
|
"learning_rate": 0.0001962616238299295,
|
|
"loss": 0.4806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4768039584159851,
|
|
"step": 1340,
|
|
"valid_targets_mean": 28781.9,
|
|
"valid_targets_min": 20991
|
|
},
|
|
{
|
|
"epoch": 1.4354322305229457,
|
|
"grad_norm": 0.6246083119855775,
|
|
"learning_rate": 0.00019619829149109336,
|
|
"loss": 0.4803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4811418652534485,
|
|
"step": 1345,
|
|
"valid_targets_mean": 28772.0,
|
|
"valid_targets_min": 17170
|
|
},
|
|
{
|
|
"epoch": 1.44076840981857,
|
|
"grad_norm": 0.7263321693572669,
|
|
"learning_rate": 0.0001961344375802881,
|
|
"loss": 0.4804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4802713990211487,
|
|
"step": 1350,
|
|
"valid_targets_mean": 28746.8,
|
|
"valid_targets_min": 19617
|
|
},
|
|
{
|
|
"epoch": 1.4461045891141944,
|
|
"grad_norm": 0.5996496688229794,
|
|
"learning_rate": 0.0001960700624437195,
|
|
"loss": 0.4798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47946688532829285,
|
|
"step": 1355,
|
|
"valid_targets_mean": 28772.0,
|
|
"valid_targets_min": 17857
|
|
},
|
|
{
|
|
"epoch": 1.4514407684098185,
|
|
"grad_norm": 0.5712796829856135,
|
|
"learning_rate": 0.00019600516643041943,
|
|
"loss": 0.4808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47957509756088257,
|
|
"step": 1360,
|
|
"valid_targets_mean": 28883.8,
|
|
"valid_targets_min": 19086
|
|
},
|
|
{
|
|
"epoch": 1.4567769477054429,
|
|
"grad_norm": 0.664555445537064,
|
|
"learning_rate": 0.0001959397498922439,
|
|
"loss": 0.4804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4794684052467346,
|
|
"step": 1365,
|
|
"valid_targets_mean": 28850.4,
|
|
"valid_targets_min": 18771
|
|
},
|
|
{
|
|
"epoch": 1.4621131270010672,
|
|
"grad_norm": 0.7318862747813851,
|
|
"learning_rate": 0.00019587381318387102,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4774578809738159,
|
|
"step": 1370,
|
|
"valid_targets_mean": 28921.8,
|
|
"valid_targets_min": 20968
|
|
},
|
|
{
|
|
"epoch": 1.4674493062966916,
|
|
"grad_norm": 0.7833483127423916,
|
|
"learning_rate": 0.0001958073566627992,
|
|
"loss": 0.4809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48020145297050476,
|
|
"step": 1375,
|
|
"valid_targets_mean": 28826.8,
|
|
"valid_targets_min": 20987
|
|
},
|
|
{
|
|
"epoch": 1.472785485592316,
|
|
"grad_norm": 0.8202494693452496,
|
|
"learning_rate": 0.00019574038068934525,
|
|
"loss": 0.4804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4827878773212433,
|
|
"step": 1380,
|
|
"valid_targets_mean": 28953.8,
|
|
"valid_targets_min": 22259
|
|
},
|
|
{
|
|
"epoch": 1.4781216648879403,
|
|
"grad_norm": 0.6624008136251276,
|
|
"learning_rate": 0.00019567288562664237,
|
|
"loss": 0.4818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4783748984336853,
|
|
"step": 1385,
|
|
"valid_targets_mean": 28843.0,
|
|
"valid_targets_min": 23178
|
|
},
|
|
{
|
|
"epoch": 1.4834578441835646,
|
|
"grad_norm": 0.7775036747710056,
|
|
"learning_rate": 0.00019560487184063806,
|
|
"loss": 0.4797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4827321767807007,
|
|
"step": 1390,
|
|
"valid_targets_mean": 29032.0,
|
|
"valid_targets_min": 22813
|
|
},
|
|
{
|
|
"epoch": 1.4887940234791888,
|
|
"grad_norm": 0.5649901315320555,
|
|
"learning_rate": 0.00019553633970009244,
|
|
"loss": 0.4802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48029249906539917,
|
|
"step": 1395,
|
|
"valid_targets_mean": 28964.1,
|
|
"valid_targets_min": 16470
|
|
},
|
|
{
|
|
"epoch": 1.4941302027748131,
|
|
"grad_norm": 0.5629363194657292,
|
|
"learning_rate": 0.00019546728957657588,
|
|
"loss": 0.4765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47325965762138367,
|
|
"step": 1400,
|
|
"valid_targets_mean": 29003.1,
|
|
"valid_targets_min": 23190
|
|
},
|
|
{
|
|
"epoch": 1.4994663820704375,
|
|
"grad_norm": 0.566425477316844,
|
|
"learning_rate": 0.00019539772184446735,
|
|
"loss": 0.4797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48466968536376953,
|
|
"step": 1405,
|
|
"valid_targets_mean": 28943.6,
|
|
"valid_targets_min": 18278
|
|
},
|
|
{
|
|
"epoch": 1.5048025613660618,
|
|
"grad_norm": 0.6745605040297462,
|
|
"learning_rate": 0.00019532763688095208,
|
|
"loss": 0.4772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4795975387096405,
|
|
"step": 1410,
|
|
"valid_targets_mean": 28871.0,
|
|
"valid_targets_min": 21906
|
|
},
|
|
{
|
|
"epoch": 1.5101387406616862,
|
|
"grad_norm": 0.7288603899967852,
|
|
"learning_rate": 0.0001952570350660197,
|
|
"loss": 0.4783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47614964842796326,
|
|
"step": 1415,
|
|
"valid_targets_mean": 28924.8,
|
|
"valid_targets_min": 16678
|
|
},
|
|
{
|
|
"epoch": 1.5154749199573105,
|
|
"grad_norm": 0.5587678214283813,
|
|
"learning_rate": 0.0001951859167824621,
|
|
"loss": 0.4796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47375237941741943,
|
|
"step": 1420,
|
|
"valid_targets_mean": 28987.3,
|
|
"valid_targets_min": 20241
|
|
},
|
|
{
|
|
"epoch": 1.520811099252935,
|
|
"grad_norm": 0.7688845619007809,
|
|
"learning_rate": 0.00019511428241587143,
|
|
"loss": 0.48,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4781056046485901,
|
|
"step": 1425,
|
|
"valid_targets_mean": 28988.3,
|
|
"valid_targets_min": 22679
|
|
},
|
|
{
|
|
"epoch": 1.5261472785485592,
|
|
"grad_norm": 0.5956710807064683,
|
|
"learning_rate": 0.00019504213235463792,
|
|
"loss": 0.4761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4784397482872009,
|
|
"step": 1430,
|
|
"valid_targets_mean": 28886.1,
|
|
"valid_targets_min": 22293
|
|
},
|
|
{
|
|
"epoch": 1.5314834578441836,
|
|
"grad_norm": 0.6641366454166832,
|
|
"learning_rate": 0.0001949694669899478,
|
|
"loss": 0.478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48540934920310974,
|
|
"step": 1435,
|
|
"valid_targets_mean": 28571.1,
|
|
"valid_targets_min": 21283
|
|
},
|
|
{
|
|
"epoch": 1.536819637139808,
|
|
"grad_norm": 0.7380686201977426,
|
|
"learning_rate": 0.00019489628671578126,
|
|
"loss": 0.478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47933629155158997,
|
|
"step": 1440,
|
|
"valid_targets_mean": 28792.6,
|
|
"valid_targets_min": 23082
|
|
},
|
|
{
|
|
"epoch": 1.5421558164354323,
|
|
"grad_norm": 0.762501599432968,
|
|
"learning_rate": 0.00019482259192891017,
|
|
"loss": 0.476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4763944149017334,
|
|
"step": 1445,
|
|
"valid_targets_mean": 28894.4,
|
|
"valid_targets_min": 19179
|
|
},
|
|
{
|
|
"epoch": 1.5474919957310567,
|
|
"grad_norm": 0.7870382961782788,
|
|
"learning_rate": 0.00019474838302889608,
|
|
"loss": 0.4753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4766830801963806,
|
|
"step": 1450,
|
|
"valid_targets_mean": 28894.8,
|
|
"valid_targets_min": 17014
|
|
},
|
|
{
|
|
"epoch": 1.552828175026681,
|
|
"grad_norm": 0.55480704737329,
|
|
"learning_rate": 0.00019467366041808797,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4800575375556946,
|
|
"step": 1455,
|
|
"valid_targets_mean": 28733.5,
|
|
"valid_targets_min": 22665
|
|
},
|
|
{
|
|
"epoch": 1.5581643543223054,
|
|
"grad_norm": 0.6590395456518542,
|
|
"learning_rate": 0.00019459842450161998,
|
|
"loss": 0.4783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4765167832374573,
|
|
"step": 1460,
|
|
"valid_targets_mean": 28797.2,
|
|
"valid_targets_min": 21481
|
|
},
|
|
{
|
|
"epoch": 1.5635005336179295,
|
|
"grad_norm": 0.644591706970926,
|
|
"learning_rate": 0.00019452267568740946,
|
|
"loss": 0.4758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47362422943115234,
|
|
"step": 1465,
|
|
"valid_targets_mean": 28885.0,
|
|
"valid_targets_min": 18996
|
|
},
|
|
{
|
|
"epoch": 1.5688367129135539,
|
|
"grad_norm": 0.641390008912236,
|
|
"learning_rate": 0.00019444641438615446,
|
|
"loss": 0.4767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4772486984729767,
|
|
"step": 1470,
|
|
"valid_targets_mean": 28653.0,
|
|
"valid_targets_min": 20800
|
|
},
|
|
{
|
|
"epoch": 1.5741728922091782,
|
|
"grad_norm": 0.5965323246358682,
|
|
"learning_rate": 0.00019436964101133178,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47263336181640625,
|
|
"step": 1475,
|
|
"valid_targets_mean": 28958.3,
|
|
"valid_targets_min": 21498
|
|
},
|
|
{
|
|
"epoch": 1.5795090715048026,
|
|
"grad_norm": 0.6746101269241358,
|
|
"learning_rate": 0.00019429235597919457,
|
|
"loss": 0.4743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46941155195236206,
|
|
"step": 1480,
|
|
"valid_targets_mean": 28767.7,
|
|
"valid_targets_min": 19146
|
|
},
|
|
{
|
|
"epoch": 1.584845250800427,
|
|
"grad_norm": 0.7539203708628796,
|
|
"learning_rate": 0.00019421455970877006,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4756488800048828,
|
|
"step": 1485,
|
|
"valid_targets_mean": 28706.9,
|
|
"valid_targets_min": 18866
|
|
},
|
|
{
|
|
"epoch": 1.590181430096051,
|
|
"grad_norm": 0.5581858891979666,
|
|
"learning_rate": 0.00019413625262185735,
|
|
"loss": 0.4772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47360649704933167,
|
|
"step": 1490,
|
|
"valid_targets_mean": 28828.8,
|
|
"valid_targets_min": 22508
|
|
},
|
|
{
|
|
"epoch": 1.5955176093916754,
|
|
"grad_norm": 1.1893212398925357,
|
|
"learning_rate": 0.00019405743514302516,
|
|
"loss": 0.4771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48227018117904663,
|
|
"step": 1495,
|
|
"valid_targets_mean": 28723.0,
|
|
"valid_targets_min": 16706
|
|
},
|
|
{
|
|
"epoch": 1.6008537886872998,
|
|
"grad_norm": 0.7594155355659317,
|
|
"learning_rate": 0.0001939781076996094,
|
|
"loss": 0.4758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47334688901901245,
|
|
"step": 1500,
|
|
"valid_targets_mean": 29004.8,
|
|
"valid_targets_min": 23444
|
|
},
|
|
{
|
|
"epoch": 1.6061899679829241,
|
|
"grad_norm": 0.6558435883064311,
|
|
"learning_rate": 0.00019389827072171096,
|
|
"loss": 0.4778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48009437322616577,
|
|
"step": 1505,
|
|
"valid_targets_mean": 28780.6,
|
|
"valid_targets_min": 20252
|
|
},
|
|
{
|
|
"epoch": 1.6115261472785485,
|
|
"grad_norm": 0.6462809800883502,
|
|
"learning_rate": 0.0001938179246421934,
|
|
"loss": 0.477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4776078760623932,
|
|
"step": 1510,
|
|
"valid_targets_mean": 29037.6,
|
|
"valid_targets_min": 19704
|
|
},
|
|
{
|
|
"epoch": 1.6168623265741728,
|
|
"grad_norm": 0.7619357108206934,
|
|
"learning_rate": 0.0001937370698966804,
|
|
"loss": 0.4749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4755183458328247,
|
|
"step": 1515,
|
|
"valid_targets_mean": 28866.1,
|
|
"valid_targets_min": 20479
|
|
},
|
|
{
|
|
"epoch": 1.6221985058697972,
|
|
"grad_norm": 0.6718256470924716,
|
|
"learning_rate": 0.00019365570692355373,
|
|
"loss": 0.476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47640013694763184,
|
|
"step": 1520,
|
|
"valid_targets_mean": 28887.7,
|
|
"valid_targets_min": 21952
|
|
},
|
|
{
|
|
"epoch": 1.6275346851654215,
|
|
"grad_norm": 0.5766744008183909,
|
|
"learning_rate": 0.00019357383616395055,
|
|
"loss": 0.4738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4738202691078186,
|
|
"step": 1525,
|
|
"valid_targets_mean": 28713.7,
|
|
"valid_targets_min": 20286
|
|
},
|
|
{
|
|
"epoch": 1.632870864461046,
|
|
"grad_norm": 0.5779758000586792,
|
|
"learning_rate": 0.00019349145806176125,
|
|
"loss": 0.4749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4797191023826599,
|
|
"step": 1530,
|
|
"valid_targets_mean": 28577.1,
|
|
"valid_targets_min": 20501
|
|
},
|
|
{
|
|
"epoch": 1.6382070437566703,
|
|
"grad_norm": 0.6742422191418099,
|
|
"learning_rate": 0.00019340857306362685,
|
|
"loss": 0.4736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47305184602737427,
|
|
"step": 1535,
|
|
"valid_targets_mean": 28852.1,
|
|
"valid_targets_min": 22994
|
|
},
|
|
{
|
|
"epoch": 1.6435432230522946,
|
|
"grad_norm": 0.588034899559399,
|
|
"learning_rate": 0.00019332518161893682,
|
|
"loss": 0.4747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47961777448654175,
|
|
"step": 1540,
|
|
"valid_targets_mean": 28754.7,
|
|
"valid_targets_min": 18503
|
|
},
|
|
{
|
|
"epoch": 1.648879402347919,
|
|
"grad_norm": 0.7222210515016969,
|
|
"learning_rate": 0.00019324128417982637,
|
|
"loss": 0.4734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4739217162132263,
|
|
"step": 1545,
|
|
"valid_targets_mean": 28845.9,
|
|
"valid_targets_min": 22369
|
|
},
|
|
{
|
|
"epoch": 1.6542155816435433,
|
|
"grad_norm": 0.7127517622189531,
|
|
"learning_rate": 0.0001931568812011742,
|
|
"loss": 0.4729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47446611523628235,
|
|
"step": 1550,
|
|
"valid_targets_mean": 28677.2,
|
|
"valid_targets_min": 18112
|
|
},
|
|
{
|
|
"epoch": 1.6595517609391677,
|
|
"grad_norm": 0.6484279198393836,
|
|
"learning_rate": 0.00019307197314059996,
|
|
"loss": 0.4741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4715951383113861,
|
|
"step": 1555,
|
|
"valid_targets_mean": 28785.7,
|
|
"valid_targets_min": 19884
|
|
},
|
|
{
|
|
"epoch": 1.664887940234792,
|
|
"grad_norm": 0.6384503419821512,
|
|
"learning_rate": 0.00019298656045846176,
|
|
"loss": 0.475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47511929273605347,
|
|
"step": 1560,
|
|
"valid_targets_mean": 28813.4,
|
|
"valid_targets_min": 22326
|
|
},
|
|
{
|
|
"epoch": 1.6702241195304164,
|
|
"grad_norm": 0.4962068369745076,
|
|
"learning_rate": 0.00019290064361785373,
|
|
"loss": 0.4735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4727150499820709,
|
|
"step": 1565,
|
|
"valid_targets_mean": 28866.5,
|
|
"valid_targets_min": 21069
|
|
},
|
|
{
|
|
"epoch": 1.6755602988260405,
|
|
"grad_norm": 0.6404261217666243,
|
|
"learning_rate": 0.00019281422308460336,
|
|
"loss": 0.4733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4757715165615082,
|
|
"step": 1570,
|
|
"valid_targets_mean": 29004.6,
|
|
"valid_targets_min": 23320
|
|
},
|
|
{
|
|
"epoch": 1.6808964781216649,
|
|
"grad_norm": 0.42810653136543797,
|
|
"learning_rate": 0.0001927272993272692,
|
|
"loss": 0.4726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4702845513820648,
|
|
"step": 1575,
|
|
"valid_targets_mean": 28811.0,
|
|
"valid_targets_min": 21827
|
|
},
|
|
{
|
|
"epoch": 1.6862326574172892,
|
|
"grad_norm": 0.5709891360685495,
|
|
"learning_rate": 0.00019263987281713818,
|
|
"loss": 0.4754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47373712062835693,
|
|
"step": 1580,
|
|
"valid_targets_mean": 28732.8,
|
|
"valid_targets_min": 20893
|
|
},
|
|
{
|
|
"epoch": 1.6915688367129136,
|
|
"grad_norm": 0.5834586825152454,
|
|
"learning_rate": 0.00019255194402822298,
|
|
"loss": 0.4722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47453397512435913,
|
|
"step": 1585,
|
|
"valid_targets_mean": 28772.9,
|
|
"valid_targets_min": 21085
|
|
},
|
|
{
|
|
"epoch": 1.696905016008538,
|
|
"grad_norm": 0.6470249725446496,
|
|
"learning_rate": 0.00019246351343725973,
|
|
"loss": 0.4711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.473699688911438,
|
|
"step": 1590,
|
|
"valid_targets_mean": 28835.1,
|
|
"valid_targets_min": 20580
|
|
},
|
|
{
|
|
"epoch": 1.702241195304162,
|
|
"grad_norm": 0.5973925947254088,
|
|
"learning_rate": 0.00019237458152370507,
|
|
"loss": 0.4706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4700833261013031,
|
|
"step": 1595,
|
|
"valid_targets_mean": 28884.6,
|
|
"valid_targets_min": 23013
|
|
},
|
|
{
|
|
"epoch": 1.7075773745997864,
|
|
"grad_norm": 0.5564214773287839,
|
|
"learning_rate": 0.00019228514876973386,
|
|
"loss": 0.4724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47462400794029236,
|
|
"step": 1600,
|
|
"valid_targets_mean": 28914.5,
|
|
"valid_targets_min": 22150
|
|
},
|
|
{
|
|
"epoch": 1.7129135538954108,
|
|
"grad_norm": 0.6833938785925946,
|
|
"learning_rate": 0.00019219521566023637,
|
|
"loss": 0.4703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4728323817253113,
|
|
"step": 1605,
|
|
"valid_targets_mean": 28960.5,
|
|
"valid_targets_min": 20794
|
|
},
|
|
{
|
|
"epoch": 1.7182497331910351,
|
|
"grad_norm": 0.5711738344386339,
|
|
"learning_rate": 0.00019210478268281576,
|
|
"loss": 0.4719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46786385774612427,
|
|
"step": 1610,
|
|
"valid_targets_mean": 28964.2,
|
|
"valid_targets_min": 21862
|
|
},
|
|
{
|
|
"epoch": 1.7235859124866595,
|
|
"grad_norm": 0.7651126866133595,
|
|
"learning_rate": 0.00019201385032778534,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47424450516700745,
|
|
"step": 1615,
|
|
"valid_targets_mean": 28854.0,
|
|
"valid_targets_min": 22067
|
|
},
|
|
{
|
|
"epoch": 1.7289220917822838,
|
|
"grad_norm": 0.7989209097175383,
|
|
"learning_rate": 0.00019192241908816602,
|
|
"loss": 0.4708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47150248289108276,
|
|
"step": 1620,
|
|
"valid_targets_mean": 28869.3,
|
|
"valid_targets_min": 20094
|
|
},
|
|
{
|
|
"epoch": 1.7342582710779082,
|
|
"grad_norm": 0.6681718819850506,
|
|
"learning_rate": 0.00019183048945968357,
|
|
"loss": 0.4705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4716719388961792,
|
|
"step": 1625,
|
|
"valid_targets_mean": 28579.5,
|
|
"valid_targets_min": 21541
|
|
},
|
|
{
|
|
"epoch": 1.7395944503735326,
|
|
"grad_norm": 0.5653742514176838,
|
|
"learning_rate": 0.00019173806194076597,
|
|
"loss": 0.4706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46959933638572693,
|
|
"step": 1630,
|
|
"valid_targets_mean": 28752.8,
|
|
"valid_targets_min": 19845
|
|
},
|
|
{
|
|
"epoch": 1.744930629669157,
|
|
"grad_norm": 0.5773186122160526,
|
|
"learning_rate": 0.0001916451370325406,
|
|
"loss": 0.4702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46714383363723755,
|
|
"step": 1635,
|
|
"valid_targets_mean": 29007.2,
|
|
"valid_targets_min": 20858
|
|
},
|
|
{
|
|
"epoch": 1.7502668089647813,
|
|
"grad_norm": 0.46286187641207616,
|
|
"learning_rate": 0.00019155171523883166,
|
|
"loss": 0.4698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46844184398651123,
|
|
"step": 1640,
|
|
"valid_targets_mean": 28928.1,
|
|
"valid_targets_min": 23272
|
|
},
|
|
{
|
|
"epoch": 1.7556029882604056,
|
|
"grad_norm": 0.6254757262158007,
|
|
"learning_rate": 0.00019145779706615745,
|
|
"loss": 0.4705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4707171618938446,
|
|
"step": 1645,
|
|
"valid_targets_mean": 28948.8,
|
|
"valid_targets_min": 21643
|
|
},
|
|
{
|
|
"epoch": 1.76093916755603,
|
|
"grad_norm": 0.6495021677221583,
|
|
"learning_rate": 0.00019136338302372746,
|
|
"loss": 0.4687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46310025453567505,
|
|
"step": 1650,
|
|
"valid_targets_mean": 28816.1,
|
|
"valid_targets_min": 22820
|
|
},
|
|
{
|
|
"epoch": 1.7662753468516543,
|
|
"grad_norm": 0.5538016735142929,
|
|
"learning_rate": 0.0001912684736234397,
|
|
"loss": 0.4684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4689071774482727,
|
|
"step": 1655,
|
|
"valid_targets_mean": 28719.9,
|
|
"valid_targets_min": 16202
|
|
},
|
|
{
|
|
"epoch": 1.7716115261472787,
|
|
"grad_norm": 0.6126692834218251,
|
|
"learning_rate": 0.00019117306937987803,
|
|
"loss": 0.4705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4690462350845337,
|
|
"step": 1660,
|
|
"valid_targets_mean": 28872.4,
|
|
"valid_targets_min": 19231
|
|
},
|
|
{
|
|
"epoch": 1.776947705442903,
|
|
"grad_norm": 0.6015035250124011,
|
|
"learning_rate": 0.00019107717081030918,
|
|
"loss": 0.4681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4632602632045746,
|
|
"step": 1665,
|
|
"valid_targets_mean": 28903.0,
|
|
"valid_targets_min": 20693
|
|
},
|
|
{
|
|
"epoch": 1.7822838847385272,
|
|
"grad_norm": 0.6310827401529582,
|
|
"learning_rate": 0.00019098077843468012,
|
|
"loss": 0.4685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47237634658813477,
|
|
"step": 1670,
|
|
"valid_targets_mean": 28865.4,
|
|
"valid_targets_min": 23113
|
|
},
|
|
{
|
|
"epoch": 1.7876200640341515,
|
|
"grad_norm": 0.6086750154228456,
|
|
"learning_rate": 0.00019088389277561508,
|
|
"loss": 0.469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46990033984184265,
|
|
"step": 1675,
|
|
"valid_targets_mean": 28757.7,
|
|
"valid_targets_min": 17338
|
|
},
|
|
{
|
|
"epoch": 1.7929562433297759,
|
|
"grad_norm": 0.6615357295397684,
|
|
"learning_rate": 0.00019078651435841285,
|
|
"loss": 0.4689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47078636288642883,
|
|
"step": 1680,
|
|
"valid_targets_mean": 28743.6,
|
|
"valid_targets_min": 21722
|
|
},
|
|
{
|
|
"epoch": 1.7982924226254002,
|
|
"grad_norm": 0.456475404398695,
|
|
"learning_rate": 0.0001906886437110438,
|
|
"loss": 0.4671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.465562641620636,
|
|
"step": 1685,
|
|
"valid_targets_mean": 28944.3,
|
|
"valid_targets_min": 22643
|
|
},
|
|
{
|
|
"epoch": 1.8036286019210246,
|
|
"grad_norm": 0.775533033267955,
|
|
"learning_rate": 0.0001905902813641472,
|
|
"loss": 0.4707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47184234857559204,
|
|
"step": 1690,
|
|
"valid_targets_mean": 28815.4,
|
|
"valid_targets_min": 22030
|
|
},
|
|
{
|
|
"epoch": 1.8089647812166487,
|
|
"grad_norm": 0.5641131554384016,
|
|
"learning_rate": 0.00019049142785102817,
|
|
"loss": 0.4692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47210821509361267,
|
|
"step": 1695,
|
|
"valid_targets_mean": 28887.3,
|
|
"valid_targets_min": 22019
|
|
},
|
|
{
|
|
"epoch": 1.814300960512273,
|
|
"grad_norm": 0.5982378230454685,
|
|
"learning_rate": 0.00019039208370765488,
|
|
"loss": 0.4676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4715844988822937,
|
|
"step": 1700,
|
|
"valid_targets_mean": 28786.1,
|
|
"valid_targets_min": 19559
|
|
},
|
|
{
|
|
"epoch": 1.8196371398078974,
|
|
"grad_norm": 0.7369184272659887,
|
|
"learning_rate": 0.00019029224947265561,
|
|
"loss": 0.4676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4698781967163086,
|
|
"step": 1705,
|
|
"valid_targets_mean": 28841.7,
|
|
"valid_targets_min": 18016
|
|
},
|
|
{
|
|
"epoch": 1.8249733191035218,
|
|
"grad_norm": 0.5760257831206574,
|
|
"learning_rate": 0.00019019192568731582,
|
|
"loss": 0.4673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4666833281517029,
|
|
"step": 1710,
|
|
"valid_targets_mean": 28801.2,
|
|
"valid_targets_min": 19500
|
|
},
|
|
{
|
|
"epoch": 1.8303094983991461,
|
|
"grad_norm": 0.6059236454872505,
|
|
"learning_rate": 0.0001900911128955753,
|
|
"loss": 0.4652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.466926634311676,
|
|
"step": 1715,
|
|
"valid_targets_mean": 28977.7,
|
|
"valid_targets_min": 22426
|
|
},
|
|
{
|
|
"epoch": 1.8356456776947705,
|
|
"grad_norm": 0.5727493374356408,
|
|
"learning_rate": 0.0001899898116440251,
|
|
"loss": 0.4654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.468447208404541,
|
|
"step": 1720,
|
|
"valid_targets_mean": 29006.2,
|
|
"valid_targets_min": 17008
|
|
},
|
|
{
|
|
"epoch": 1.8409818569903948,
|
|
"grad_norm": 0.5282364422069715,
|
|
"learning_rate": 0.00018988802248190458,
|
|
"loss": 0.4675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46732309460639954,
|
|
"step": 1725,
|
|
"valid_targets_mean": 28986.0,
|
|
"valid_targets_min": 22210
|
|
},
|
|
{
|
|
"epoch": 1.8463180362860192,
|
|
"grad_norm": 0.5911835364471943,
|
|
"learning_rate": 0.0001897857459610986,
|
|
"loss": 0.4668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4651775360107422,
|
|
"step": 1730,
|
|
"valid_targets_mean": 28794.5,
|
|
"valid_targets_min": 17770
|
|
},
|
|
{
|
|
"epoch": 1.8516542155816436,
|
|
"grad_norm": 0.6068041014668428,
|
|
"learning_rate": 0.0001896829826361343,
|
|
"loss": 0.4687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4736365079879761,
|
|
"step": 1735,
|
|
"valid_targets_mean": 28701.1,
|
|
"valid_targets_min": 20561
|
|
},
|
|
{
|
|
"epoch": 1.856990394877268,
|
|
"grad_norm": 0.6537224970166546,
|
|
"learning_rate": 0.0001895797330641782,
|
|
"loss": 0.4672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46415460109710693,
|
|
"step": 1740,
|
|
"valid_targets_mean": 28752.2,
|
|
"valid_targets_min": 21504
|
|
},
|
|
{
|
|
"epoch": 1.8623265741728923,
|
|
"grad_norm": 0.5992051872356972,
|
|
"learning_rate": 0.00018947599780503324,
|
|
"loss": 0.4661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46628502011299133,
|
|
"step": 1745,
|
|
"valid_targets_mean": 28908.7,
|
|
"valid_targets_min": 22966
|
|
},
|
|
{
|
|
"epoch": 1.8676627534685166,
|
|
"grad_norm": 0.5885838886628874,
|
|
"learning_rate": 0.0001893717774211356,
|
|
"loss": 0.4665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46606913208961487,
|
|
"step": 1750,
|
|
"valid_targets_mean": 29052.6,
|
|
"valid_targets_min": 21763
|
|
},
|
|
{
|
|
"epoch": 1.872998932764141,
|
|
"grad_norm": 0.7446688714076565,
|
|
"learning_rate": 0.0001892670724775518,
|
|
"loss": 0.4658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4657374620437622,
|
|
"step": 1755,
|
|
"valid_targets_mean": 28758.5,
|
|
"valid_targets_min": 21194
|
|
},
|
|
{
|
|
"epoch": 1.8783351120597653,
|
|
"grad_norm": 0.5036359705065248,
|
|
"learning_rate": 0.00018916188354197559,
|
|
"loss": 0.4654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.463339239358902,
|
|
"step": 1760,
|
|
"valid_targets_mean": 28803.6,
|
|
"valid_targets_min": 22932
|
|
},
|
|
{
|
|
"epoch": 1.8836712913553897,
|
|
"grad_norm": 0.5904829761655065,
|
|
"learning_rate": 0.00018905621118472468,
|
|
"loss": 0.4669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4687913954257965,
|
|
"step": 1765,
|
|
"valid_targets_mean": 28912.2,
|
|
"valid_targets_min": 21915
|
|
},
|
|
{
|
|
"epoch": 1.8890074706510138,
|
|
"grad_norm": 0.6758845024263211,
|
|
"learning_rate": 0.000188950055978738,
|
|
"loss": 0.4658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46433591842651367,
|
|
"step": 1770,
|
|
"valid_targets_mean": 28581.5,
|
|
"valid_targets_min": 17814
|
|
},
|
|
{
|
|
"epoch": 1.8943436499466382,
|
|
"grad_norm": 0.6655010490838106,
|
|
"learning_rate": 0.0001888434184995723,
|
|
"loss": 0.4678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47092902660369873,
|
|
"step": 1775,
|
|
"valid_targets_mean": 28813.9,
|
|
"valid_targets_min": 21894
|
|
},
|
|
{
|
|
"epoch": 1.8996798292422625,
|
|
"grad_norm": 0.6653183565853287,
|
|
"learning_rate": 0.00018873629932539921,
|
|
"loss": 0.4646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4640280604362488,
|
|
"step": 1780,
|
|
"valid_targets_mean": 28996.8,
|
|
"valid_targets_min": 22541
|
|
},
|
|
{
|
|
"epoch": 1.9050160085378869,
|
|
"grad_norm": 0.5578215869627684,
|
|
"learning_rate": 0.000188628699037002,
|
|
"loss": 0.4636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4635908007621765,
|
|
"step": 1785,
|
|
"valid_targets_mean": 28689.9,
|
|
"valid_targets_min": 19297
|
|
},
|
|
{
|
|
"epoch": 1.9103521878335112,
|
|
"grad_norm": 0.6785640133567981,
|
|
"learning_rate": 0.00018852061821777248,
|
|
"loss": 0.4647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4673336446285248,
|
|
"step": 1790,
|
|
"valid_targets_mean": 28844.9,
|
|
"valid_targets_min": 22033
|
|
},
|
|
{
|
|
"epoch": 1.9156883671291356,
|
|
"grad_norm": 0.49819218528460424,
|
|
"learning_rate": 0.00018841205745370775,
|
|
"loss": 0.4639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4637882113456726,
|
|
"step": 1795,
|
|
"valid_targets_mean": 28917.5,
|
|
"valid_targets_min": 20684
|
|
},
|
|
{
|
|
"epoch": 1.9210245464247597,
|
|
"grad_norm": 0.5735702051837462,
|
|
"learning_rate": 0.00018830301733340724,
|
|
"loss": 0.4657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4697621464729309,
|
|
"step": 1800,
|
|
"valid_targets_mean": 28733.1,
|
|
"valid_targets_min": 21774
|
|
},
|
|
{
|
|
"epoch": 1.926360725720384,
|
|
"grad_norm": 0.8086775803798638,
|
|
"learning_rate": 0.00018819349844806924,
|
|
"loss": 0.4659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4654462933540344,
|
|
"step": 1805,
|
|
"valid_targets_mean": 28812.4,
|
|
"valid_targets_min": 20848
|
|
},
|
|
{
|
|
"epoch": 1.9316969050160084,
|
|
"grad_norm": 0.763453791015228,
|
|
"learning_rate": 0.0001880835013914879,
|
|
"loss": 0.4619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4645582139492035,
|
|
"step": 1810,
|
|
"valid_targets_mean": 28802.2,
|
|
"valid_targets_min": 17513
|
|
},
|
|
{
|
|
"epoch": 1.9370330843116328,
|
|
"grad_norm": 0.6122778424943308,
|
|
"learning_rate": 0.00018797302676004988,
|
|
"loss": 0.4647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4665798246860504,
|
|
"step": 1815,
|
|
"valid_targets_mean": 28811.3,
|
|
"valid_targets_min": 22032
|
|
},
|
|
{
|
|
"epoch": 1.9423692636072571,
|
|
"grad_norm": 0.7904794382731611,
|
|
"learning_rate": 0.00018786207515273127,
|
|
"loss": 0.4668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46696364879608154,
|
|
"step": 1820,
|
|
"valid_targets_mean": 28671.8,
|
|
"valid_targets_min": 20916
|
|
},
|
|
{
|
|
"epoch": 1.9477054429028815,
|
|
"grad_norm": 0.6363143649888835,
|
|
"learning_rate": 0.00018775064717109415,
|
|
"loss": 0.4669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.465232789516449,
|
|
"step": 1825,
|
|
"valid_targets_mean": 28751.9,
|
|
"valid_targets_min": 21890
|
|
},
|
|
{
|
|
"epoch": 1.9530416221985059,
|
|
"grad_norm": 0.5872648061296175,
|
|
"learning_rate": 0.00018763874341928344,
|
|
"loss": 0.4645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4646969437599182,
|
|
"step": 1830,
|
|
"valid_targets_mean": 29092.1,
|
|
"valid_targets_min": 22338
|
|
},
|
|
{
|
|
"epoch": 1.9583778014941302,
|
|
"grad_norm": 0.5799013901167623,
|
|
"learning_rate": 0.00018752636450402373,
|
|
"loss": 0.4651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4642789959907532,
|
|
"step": 1835,
|
|
"valid_targets_mean": 28882.9,
|
|
"valid_targets_min": 19336
|
|
},
|
|
{
|
|
"epoch": 1.9637139807897546,
|
|
"grad_norm": 0.5552369882137852,
|
|
"learning_rate": 0.0001874135110346157,
|
|
"loss": 0.4637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4652862548828125,
|
|
"step": 1840,
|
|
"valid_targets_mean": 28639.8,
|
|
"valid_targets_min": 18392
|
|
},
|
|
{
|
|
"epoch": 1.969050160085379,
|
|
"grad_norm": 0.5877245255829862,
|
|
"learning_rate": 0.0001873001836229331,
|
|
"loss": 0.4651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4651973247528076,
|
|
"step": 1845,
|
|
"valid_targets_mean": 28827.9,
|
|
"valid_targets_min": 14728
|
|
},
|
|
{
|
|
"epoch": 1.9743863393810033,
|
|
"grad_norm": 0.6124274001415514,
|
|
"learning_rate": 0.00018718638288341933,
|
|
"loss": 0.4639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46294456720352173,
|
|
"step": 1850,
|
|
"valid_targets_mean": 28673.1,
|
|
"valid_targets_min": 19314
|
|
},
|
|
{
|
|
"epoch": 1.9797225186766276,
|
|
"grad_norm": 0.667045599764406,
|
|
"learning_rate": 0.00018707210943308397,
|
|
"loss": 0.4655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46378907561302185,
|
|
"step": 1855,
|
|
"valid_targets_mean": 28827.3,
|
|
"valid_targets_min": 23077
|
|
},
|
|
{
|
|
"epoch": 1.985058697972252,
|
|
"grad_norm": 0.570184918531401,
|
|
"learning_rate": 0.0001869573638914997,
|
|
"loss": 0.4623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45885172486305237,
|
|
"step": 1860,
|
|
"valid_targets_mean": 28967.2,
|
|
"valid_targets_min": 22321
|
|
},
|
|
{
|
|
"epoch": 1.9903948772678763,
|
|
"grad_norm": 0.7245936338973842,
|
|
"learning_rate": 0.00018684214688079875,
|
|
"loss": 0.4646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46353331208229065,
|
|
"step": 1865,
|
|
"valid_targets_mean": 28860.7,
|
|
"valid_targets_min": 22762
|
|
},
|
|
{
|
|
"epoch": 1.9957310565635007,
|
|
"grad_norm": 0.6639169072091015,
|
|
"learning_rate": 0.00018672645902566957,
|
|
"loss": 0.4623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46214547753334045,
|
|
"step": 1870,
|
|
"valid_targets_mean": 28939.7,
|
|
"valid_targets_min": 22612
|
|
},
|
|
{
|
|
"epoch": 2.001067235859125,
|
|
"grad_norm": 0.59933380977441,
|
|
"learning_rate": 0.0001866103009533535,
|
|
"loss": 0.4622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45977073907852173,
|
|
"step": 1875,
|
|
"valid_targets_mean": 28960.8,
|
|
"valid_targets_min": 23033
|
|
},
|
|
{
|
|
"epoch": 2.0064034151547494,
|
|
"grad_norm": 0.5187899325749421,
|
|
"learning_rate": 0.00018649367329364128,
|
|
"loss": 0.4608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4570675194263458,
|
|
"step": 1880,
|
|
"valid_targets_mean": 28719.8,
|
|
"valid_targets_min": 17665
|
|
},
|
|
{
|
|
"epoch": 2.0117395944503733,
|
|
"grad_norm": 0.5001063716442616,
|
|
"learning_rate": 0.00018637657667886965,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4625626802444458,
|
|
"step": 1885,
|
|
"valid_targets_mean": 28657.0,
|
|
"valid_targets_min": 17376
|
|
},
|
|
{
|
|
"epoch": 2.0170757737459977,
|
|
"grad_norm": 0.7323901122640898,
|
|
"learning_rate": 0.0001862590117439181,
|
|
"loss": 0.4599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46335369348526,
|
|
"step": 1890,
|
|
"valid_targets_mean": 28777.8,
|
|
"valid_targets_min": 21955
|
|
},
|
|
{
|
|
"epoch": 2.022411953041622,
|
|
"grad_norm": 0.5676814710375527,
|
|
"learning_rate": 0.00018614097912620506,
|
|
"loss": 0.4607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4586028456687927,
|
|
"step": 1895,
|
|
"valid_targets_mean": 28944.1,
|
|
"valid_targets_min": 22091
|
|
},
|
|
{
|
|
"epoch": 2.0277481323372464,
|
|
"grad_norm": 0.6827923425890666,
|
|
"learning_rate": 0.0001860224794656848,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45837873220443726,
|
|
"step": 1900,
|
|
"valid_targets_mean": 28785.9,
|
|
"valid_targets_min": 18288
|
|
},
|
|
{
|
|
"epoch": 2.0330843116328707,
|
|
"grad_norm": 0.600890619574247,
|
|
"learning_rate": 0.00018590351340484388,
|
|
"loss": 0.4595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4622270166873932,
|
|
"step": 1905,
|
|
"valid_targets_mean": 29004.6,
|
|
"valid_targets_min": 23067
|
|
},
|
|
{
|
|
"epoch": 2.038420490928495,
|
|
"grad_norm": 0.5654565589530285,
|
|
"learning_rate": 0.0001857840815886974,
|
|
"loss": 0.4583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45639216899871826,
|
|
"step": 1910,
|
|
"valid_targets_mean": 28861.2,
|
|
"valid_targets_min": 19565
|
|
},
|
|
{
|
|
"epoch": 2.0437566702241194,
|
|
"grad_norm": 0.6829281549445924,
|
|
"learning_rate": 0.0001856641846647859,
|
|
"loss": 0.4594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4606631398200989,
|
|
"step": 1915,
|
|
"valid_targets_mean": 28820.4,
|
|
"valid_targets_min": 18753
|
|
},
|
|
{
|
|
"epoch": 2.049092849519744,
|
|
"grad_norm": 0.5400921089118355,
|
|
"learning_rate": 0.00018554382328317163,
|
|
"loss": 0.4599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4611979126930237,
|
|
"step": 1920,
|
|
"valid_targets_mean": 28742.9,
|
|
"valid_targets_min": 22417
|
|
},
|
|
{
|
|
"epoch": 2.054429028815368,
|
|
"grad_norm": 0.6935582624005341,
|
|
"learning_rate": 0.00018542299809643493,
|
|
"loss": 0.4601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4632042348384857,
|
|
"step": 1925,
|
|
"valid_targets_mean": 28712.9,
|
|
"valid_targets_min": 18976
|
|
},
|
|
{
|
|
"epoch": 2.0597652081109925,
|
|
"grad_norm": 0.5813064771146723,
|
|
"learning_rate": 0.00018530170975967098,
|
|
"loss": 0.4604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.457609087228775,
|
|
"step": 1930,
|
|
"valid_targets_mean": 28734.9,
|
|
"valid_targets_min": 18178
|
|
},
|
|
{
|
|
"epoch": 2.065101387406617,
|
|
"grad_norm": 0.6538864203305739,
|
|
"learning_rate": 0.00018517995893048604,
|
|
"loss": 0.4604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46187877655029297,
|
|
"step": 1935,
|
|
"valid_targets_mean": 28943.5,
|
|
"valid_targets_min": 18084
|
|
},
|
|
{
|
|
"epoch": 2.070437566702241,
|
|
"grad_norm": 0.5842141291679069,
|
|
"learning_rate": 0.0001850577462689939,
|
|
"loss": 0.4588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45876073837280273,
|
|
"step": 1940,
|
|
"valid_targets_mean": 28907.3,
|
|
"valid_targets_min": 22624
|
|
},
|
|
{
|
|
"epoch": 2.0757737459978656,
|
|
"grad_norm": 0.6842902569703102,
|
|
"learning_rate": 0.00018493507243781224,
|
|
"loss": 0.4612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4589679539203644,
|
|
"step": 1945,
|
|
"valid_targets_mean": 28795.6,
|
|
"valid_targets_min": 22052
|
|
},
|
|
{
|
|
"epoch": 2.08110992529349,
|
|
"grad_norm": 0.635937259057549,
|
|
"learning_rate": 0.00018481193810205933,
|
|
"loss": 0.4586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.464557409286499,
|
|
"step": 1950,
|
|
"valid_targets_mean": 28863.1,
|
|
"valid_targets_min": 20012
|
|
},
|
|
{
|
|
"epoch": 2.0864461045891143,
|
|
"grad_norm": 0.5144324114257415,
|
|
"learning_rate": 0.00018468834392935016,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46036064624786377,
|
|
"step": 1955,
|
|
"valid_targets_mean": 28950.8,
|
|
"valid_targets_min": 22641
|
|
},
|
|
{
|
|
"epoch": 2.0917822838847386,
|
|
"grad_norm": 0.5987241157449255,
|
|
"learning_rate": 0.00018456429058979278,
|
|
"loss": 0.4601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4588262736797333,
|
|
"step": 1960,
|
|
"valid_targets_mean": 28777.1,
|
|
"valid_targets_min": 19931
|
|
},
|
|
{
|
|
"epoch": 2.097118463180363,
|
|
"grad_norm": 0.6866158518403197,
|
|
"learning_rate": 0.0001844397787559848,
|
|
"loss": 0.4574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.457817018032074,
|
|
"step": 1965,
|
|
"valid_targets_mean": 28779.0,
|
|
"valid_targets_min": 19641
|
|
},
|
|
{
|
|
"epoch": 2.1024546424759873,
|
|
"grad_norm": 0.5291979866110622,
|
|
"learning_rate": 0.00018431480910300984,
|
|
"loss": 0.456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45642563700675964,
|
|
"step": 1970,
|
|
"valid_targets_mean": 28720.2,
|
|
"valid_targets_min": 21564
|
|
},
|
|
{
|
|
"epoch": 2.1077908217716117,
|
|
"grad_norm": 0.6023975429951549,
|
|
"learning_rate": 0.00018418938230843358,
|
|
"loss": 0.4578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45973849296569824,
|
|
"step": 1975,
|
|
"valid_targets_mean": 28830.4,
|
|
"valid_targets_min": 21782
|
|
},
|
|
{
|
|
"epoch": 2.113127001067236,
|
|
"grad_norm": 0.5115658188796195,
|
|
"learning_rate": 0.00018406349905230037,
|
|
"loss": 0.4581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4587351083755493,
|
|
"step": 1980,
|
|
"valid_targets_mean": 28822.9,
|
|
"valid_targets_min": 18936
|
|
},
|
|
{
|
|
"epoch": 2.11846318036286,
|
|
"grad_norm": 0.7055463833686328,
|
|
"learning_rate": 0.0001839371600171294,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4615404009819031,
|
|
"step": 1985,
|
|
"valid_targets_mean": 28784.8,
|
|
"valid_targets_min": 19860
|
|
},
|
|
{
|
|
"epoch": 2.1237993596584843,
|
|
"grad_norm": 0.6445978890910325,
|
|
"learning_rate": 0.00018381036588791094,
|
|
"loss": 0.4591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46140262484550476,
|
|
"step": 1990,
|
|
"valid_targets_mean": 28812.8,
|
|
"valid_targets_min": 20617
|
|
},
|
|
{
|
|
"epoch": 2.1291355389541087,
|
|
"grad_norm": 0.5642093421556056,
|
|
"learning_rate": 0.0001836831173521028,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45539408922195435,
|
|
"step": 1995,
|
|
"valid_targets_mean": 28769.3,
|
|
"valid_targets_min": 23476
|
|
},
|
|
{
|
|
"epoch": 2.134471718249733,
|
|
"grad_norm": 0.5633066242804747,
|
|
"learning_rate": 0.00018355541509962645,
|
|
"loss": 0.4567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4613940715789795,
|
|
"step": 2000,
|
|
"valid_targets_mean": 28864.9,
|
|
"valid_targets_min": 20137
|
|
},
|
|
{
|
|
"epoch": 2.1398078975453574,
|
|
"grad_norm": 0.7423375483745515,
|
|
"learning_rate": 0.00018342725982286342,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45607930421829224,
|
|
"step": 2005,
|
|
"valid_targets_mean": 28840.3,
|
|
"valid_targets_min": 19525
|
|
},
|
|
{
|
|
"epoch": 2.1451440768409817,
|
|
"grad_norm": 0.6407532064844419,
|
|
"learning_rate": 0.00018329865221665138,
|
|
"loss": 0.4579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46243995428085327,
|
|
"step": 2010,
|
|
"valid_targets_mean": 28735.4,
|
|
"valid_targets_min": 21423
|
|
},
|
|
{
|
|
"epoch": 2.150480256136606,
|
|
"grad_norm": 0.5071631414893789,
|
|
"learning_rate": 0.0001831695929782805,
|
|
"loss": 0.4571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4553641676902771,
|
|
"step": 2015,
|
|
"valid_targets_mean": 28805.2,
|
|
"valid_targets_min": 21289
|
|
},
|
|
{
|
|
"epoch": 2.1558164354322304,
|
|
"grad_norm": 0.5351424501446503,
|
|
"learning_rate": 0.00018304008280748963,
|
|
"loss": 0.4577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45526012778282166,
|
|
"step": 2020,
|
|
"valid_targets_mean": 28898.0,
|
|
"valid_targets_min": 22334
|
|
},
|
|
{
|
|
"epoch": 2.161152614727855,
|
|
"grad_norm": 0.5832349897737564,
|
|
"learning_rate": 0.00018291012240646246,
|
|
"loss": 0.4588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4605897068977356,
|
|
"step": 2025,
|
|
"valid_targets_mean": 28928.8,
|
|
"valid_targets_min": 22394
|
|
},
|
|
{
|
|
"epoch": 2.166488794023479,
|
|
"grad_norm": 0.6341432814176831,
|
|
"learning_rate": 0.00018277971247982383,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45824578404426575,
|
|
"step": 2030,
|
|
"valid_targets_mean": 28848.5,
|
|
"valid_targets_min": 23009
|
|
},
|
|
{
|
|
"epoch": 2.1718249733191035,
|
|
"grad_norm": 0.5618988276078262,
|
|
"learning_rate": 0.0001826488537346358,
|
|
"loss": 0.4549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4593442380428314,
|
|
"step": 2035,
|
|
"valid_targets_mean": 28906.0,
|
|
"valid_targets_min": 22415
|
|
},
|
|
{
|
|
"epoch": 2.177161152614728,
|
|
"grad_norm": 0.5852316676215777,
|
|
"learning_rate": 0.00018251754688039385,
|
|
"loss": 0.4565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45731204748153687,
|
|
"step": 2040,
|
|
"valid_targets_mean": 28754.1,
|
|
"valid_targets_min": 21083
|
|
},
|
|
{
|
|
"epoch": 2.1824973319103522,
|
|
"grad_norm": 0.5470934333926433,
|
|
"learning_rate": 0.00018238579262902301,
|
|
"loss": 0.4566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45632243156433105,
|
|
"step": 2045,
|
|
"valid_targets_mean": 28928.7,
|
|
"valid_targets_min": 19931
|
|
},
|
|
{
|
|
"epoch": 2.1878335112059766,
|
|
"grad_norm": 0.5415715818158156,
|
|
"learning_rate": 0.00018225359169487412,
|
|
"loss": 0.4568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4580104947090149,
|
|
"step": 2050,
|
|
"valid_targets_mean": 28765.1,
|
|
"valid_targets_min": 20572
|
|
},
|
|
{
|
|
"epoch": 2.193169690501601,
|
|
"grad_norm": 0.598303068188444,
|
|
"learning_rate": 0.00018212094479471975,
|
|
"loss": 0.457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45613572001457214,
|
|
"step": 2055,
|
|
"valid_targets_mean": 28840.7,
|
|
"valid_targets_min": 21709
|
|
},
|
|
{
|
|
"epoch": 2.1985058697972253,
|
|
"grad_norm": 0.6913200758318999,
|
|
"learning_rate": 0.0001819878526477505,
|
|
"loss": 0.4567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45595425367355347,
|
|
"step": 2060,
|
|
"valid_targets_mean": 28777.8,
|
|
"valid_targets_min": 22706
|
|
},
|
|
{
|
|
"epoch": 2.2038420490928496,
|
|
"grad_norm": 0.40806618165354064,
|
|
"learning_rate": 0.000181854315975571,
|
|
"loss": 0.4561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4540978968143463,
|
|
"step": 2065,
|
|
"valid_targets_mean": 28891.5,
|
|
"valid_targets_min": 21723
|
|
},
|
|
{
|
|
"epoch": 2.209178228388474,
|
|
"grad_norm": 0.6338950455728084,
|
|
"learning_rate": 0.000181720335502196,
|
|
"loss": 0.4552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45749494433403015,
|
|
"step": 2070,
|
|
"valid_targets_mean": 28764.5,
|
|
"valid_targets_min": 19535
|
|
},
|
|
{
|
|
"epoch": 2.2145144076840984,
|
|
"grad_norm": 0.5615674622004101,
|
|
"learning_rate": 0.0001815859119540466,
|
|
"loss": 0.4561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4547135829925537,
|
|
"step": 2075,
|
|
"valid_targets_mean": 29034.3,
|
|
"valid_targets_min": 20190
|
|
},
|
|
{
|
|
"epoch": 2.2198505869797227,
|
|
"grad_norm": 0.5942171627491899,
|
|
"learning_rate": 0.00018145104605994599,
|
|
"loss": 0.459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45587158203125,
|
|
"step": 2080,
|
|
"valid_targets_mean": 28982.1,
|
|
"valid_targets_min": 23244
|
|
},
|
|
{
|
|
"epoch": 2.225186766275347,
|
|
"grad_norm": 0.5544783734850327,
|
|
"learning_rate": 0.00018131573855111578,
|
|
"loss": 0.4563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45724672079086304,
|
|
"step": 2085,
|
|
"valid_targets_mean": 28879.3,
|
|
"valid_targets_min": 19495
|
|
},
|
|
{
|
|
"epoch": 2.2305229455709714,
|
|
"grad_norm": 0.6646252391429057,
|
|
"learning_rate": 0.00018117999016117203,
|
|
"loss": 0.4556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4570236802101135,
|
|
"step": 2090,
|
|
"valid_targets_mean": 28734.1,
|
|
"valid_targets_min": 19166
|
|
},
|
|
{
|
|
"epoch": 2.2358591248665953,
|
|
"grad_norm": 0.6369862489589629,
|
|
"learning_rate": 0.00018104380162612101,
|
|
"loss": 0.4565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4539600908756256,
|
|
"step": 2095,
|
|
"valid_targets_mean": 28860.5,
|
|
"valid_targets_min": 22225
|
|
},
|
|
{
|
|
"epoch": 2.2411953041622197,
|
|
"grad_norm": 0.6131804501294443,
|
|
"learning_rate": 0.0001809071736843556,
|
|
"loss": 0.4555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4549659490585327,
|
|
"step": 2100,
|
|
"valid_targets_mean": 28904.1,
|
|
"valid_targets_min": 22427
|
|
},
|
|
{
|
|
"epoch": 2.246531483457844,
|
|
"grad_norm": 0.5523653824690307,
|
|
"learning_rate": 0.00018077010707665084,
|
|
"loss": 0.4549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.455466628074646,
|
|
"step": 2105,
|
|
"valid_targets_mean": 28753.7,
|
|
"valid_targets_min": 23155
|
|
},
|
|
{
|
|
"epoch": 2.2518676627534684,
|
|
"grad_norm": 0.5728452825342911,
|
|
"learning_rate": 0.00018063260254616033,
|
|
"loss": 0.4563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4555572271347046,
|
|
"step": 2110,
|
|
"valid_targets_mean": 28900.0,
|
|
"valid_targets_min": 21903
|
|
},
|
|
{
|
|
"epoch": 2.2572038420490927,
|
|
"grad_norm": 0.7136693673051113,
|
|
"learning_rate": 0.00018049466083841194,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45720332860946655,
|
|
"step": 2115,
|
|
"valid_targets_mean": 28775.8,
|
|
"valid_targets_min": 22119
|
|
},
|
|
{
|
|
"epoch": 2.262540021344717,
|
|
"grad_norm": 0.4503189370802511,
|
|
"learning_rate": 0.0001803562827013039,
|
|
"loss": 0.4546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45119166374206543,
|
|
"step": 2120,
|
|
"valid_targets_mean": 28674.5,
|
|
"valid_targets_min": 20689
|
|
},
|
|
{
|
|
"epoch": 2.2678762006403415,
|
|
"grad_norm": 0.5722858990868676,
|
|
"learning_rate": 0.00018021746888510057,
|
|
"loss": 0.4576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4569234848022461,
|
|
"step": 2125,
|
|
"valid_targets_mean": 28806.0,
|
|
"valid_targets_min": 20865
|
|
},
|
|
{
|
|
"epoch": 2.273212379935966,
|
|
"grad_norm": 0.5096040586782076,
|
|
"learning_rate": 0.00018007822014242867,
|
|
"loss": 0.4553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4586583971977234,
|
|
"step": 2130,
|
|
"valid_targets_mean": 28771.0,
|
|
"valid_targets_min": 22431
|
|
},
|
|
{
|
|
"epoch": 2.27854855923159,
|
|
"grad_norm": 0.5617317894350805,
|
|
"learning_rate": 0.00017993853722827284,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4546637535095215,
|
|
"step": 2135,
|
|
"valid_targets_mean": 28571.8,
|
|
"valid_targets_min": 20067
|
|
},
|
|
{
|
|
"epoch": 2.2838847385272145,
|
|
"grad_norm": 0.5270533340074951,
|
|
"learning_rate": 0.00017979842089997186,
|
|
"loss": 0.4561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.452915757894516,
|
|
"step": 2140,
|
|
"valid_targets_mean": 28877.1,
|
|
"valid_targets_min": 20734
|
|
},
|
|
{
|
|
"epoch": 2.289220917822839,
|
|
"grad_norm": 0.4882137321818793,
|
|
"learning_rate": 0.00017965787191721435,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45327115058898926,
|
|
"step": 2145,
|
|
"valid_targets_mean": 28830.8,
|
|
"valid_targets_min": 18590
|
|
},
|
|
{
|
|
"epoch": 2.2945570971184632,
|
|
"grad_norm": 0.6423744476600158,
|
|
"learning_rate": 0.0001795168910420348,
|
|
"loss": 0.4527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4503288269042969,
|
|
"step": 2150,
|
|
"valid_targets_mean": 28775.4,
|
|
"valid_targets_min": 20912
|
|
},
|
|
{
|
|
"epoch": 2.2998932764140876,
|
|
"grad_norm": 0.6490306301093351,
|
|
"learning_rate": 0.00017937547903880917,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.455191969871521,
|
|
"step": 2155,
|
|
"valid_targets_mean": 28614.9,
|
|
"valid_targets_min": 22354
|
|
},
|
|
{
|
|
"epoch": 2.305229455709712,
|
|
"grad_norm": 0.5582032512637555,
|
|
"learning_rate": 0.00017923363667425116,
|
|
"loss": 0.4529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45650357007980347,
|
|
"step": 2160,
|
|
"valid_targets_mean": 28915.7,
|
|
"valid_targets_min": 18988
|
|
},
|
|
{
|
|
"epoch": 2.3105656350053363,
|
|
"grad_norm": 0.6009137653662315,
|
|
"learning_rate": 0.00017909136471740765,
|
|
"loss": 0.4552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4560841917991638,
|
|
"step": 2165,
|
|
"valid_targets_mean": 28753.2,
|
|
"valid_targets_min": 21835
|
|
},
|
|
{
|
|
"epoch": 2.3159018143009606,
|
|
"grad_norm": 0.6041106978318558,
|
|
"learning_rate": 0.00017894866393965476,
|
|
"loss": 0.4539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45484206080436707,
|
|
"step": 2170,
|
|
"valid_targets_mean": 28878.6,
|
|
"valid_targets_min": 22043
|
|
},
|
|
{
|
|
"epoch": 2.321237993596585,
|
|
"grad_norm": 0.4272191457749068,
|
|
"learning_rate": 0.0001788055351146936,
|
|
"loss": 0.4543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4543980062007904,
|
|
"step": 2175,
|
|
"valid_targets_mean": 28866.5,
|
|
"valid_targets_min": 21992
|
|
},
|
|
{
|
|
"epoch": 2.3265741728922094,
|
|
"grad_norm": 0.6502688723355644,
|
|
"learning_rate": 0.00017866197901854615,
|
|
"loss": 0.455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45570501685142517,
|
|
"step": 2180,
|
|
"valid_targets_mean": 28962.6,
|
|
"valid_targets_min": 19883
|
|
},
|
|
{
|
|
"epoch": 2.3319103521878333,
|
|
"grad_norm": 0.4024183500508293,
|
|
"learning_rate": 0.00017851799642955088,
|
|
"loss": 0.4568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4559852182865143,
|
|
"step": 2185,
|
|
"valid_targets_mean": 28797.3,
|
|
"valid_targets_min": 22052
|
|
},
|
|
{
|
|
"epoch": 2.3372465314834576,
|
|
"grad_norm": 0.4693015865238889,
|
|
"learning_rate": 0.00017837358812835866,
|
|
"loss": 0.457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4545251131057739,
|
|
"step": 2190,
|
|
"valid_targets_mean": 28810.5,
|
|
"valid_targets_min": 19893
|
|
},
|
|
{
|
|
"epoch": 2.342582710779082,
|
|
"grad_norm": 0.5223725512693111,
|
|
"learning_rate": 0.0001782287548979286,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4560278654098511,
|
|
"step": 2195,
|
|
"valid_targets_mean": 28810.0,
|
|
"valid_targets_min": 21589
|
|
},
|
|
{
|
|
"epoch": 2.3479188900747063,
|
|
"grad_norm": 0.5178388414730264,
|
|
"learning_rate": 0.00017808349752352357,
|
|
"loss": 0.4551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4541753828525543,
|
|
"step": 2200,
|
|
"valid_targets_mean": 28903.5,
|
|
"valid_targets_min": 17794
|
|
},
|
|
{
|
|
"epoch": 2.3532550693703307,
|
|
"grad_norm": 0.5484107517474188,
|
|
"learning_rate": 0.0001779378167927062,
|
|
"loss": 0.4553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4518164098262787,
|
|
"step": 2205,
|
|
"valid_targets_mean": 29016.5,
|
|
"valid_targets_min": 22577
|
|
},
|
|
{
|
|
"epoch": 2.358591248665955,
|
|
"grad_norm": 0.5707202376642307,
|
|
"learning_rate": 0.00017779171349533446,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.453921377658844,
|
|
"step": 2210,
|
|
"valid_targets_mean": 28946.0,
|
|
"valid_targets_min": 22579
|
|
},
|
|
{
|
|
"epoch": 2.3639274279615794,
|
|
"grad_norm": 0.6507332395005175,
|
|
"learning_rate": 0.0001776451884235573,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45342642068862915,
|
|
"step": 2215,
|
|
"valid_targets_mean": 28768.5,
|
|
"valid_targets_min": 19660
|
|
},
|
|
{
|
|
"epoch": 2.3692636072572038,
|
|
"grad_norm": 0.5253254081733058,
|
|
"learning_rate": 0.00017749824237181066,
|
|
"loss": 0.4528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.454387366771698,
|
|
"step": 2220,
|
|
"valid_targets_mean": 28863.5,
|
|
"valid_targets_min": 21233
|
|
},
|
|
{
|
|
"epoch": 2.374599786552828,
|
|
"grad_norm": 0.5749454692936622,
|
|
"learning_rate": 0.00017735087613681284,
|
|
"loss": 0.4535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4516810178756714,
|
|
"step": 2225,
|
|
"valid_targets_mean": 28899.3,
|
|
"valid_targets_min": 21644
|
|
},
|
|
{
|
|
"epoch": 2.3799359658484525,
|
|
"grad_norm": 0.5333442269729424,
|
|
"learning_rate": 0.00017720309051756026,
|
|
"loss": 0.4537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45168444514274597,
|
|
"step": 2230,
|
|
"valid_targets_mean": 28924.2,
|
|
"valid_targets_min": 22883
|
|
},
|
|
{
|
|
"epoch": 2.385272145144077,
|
|
"grad_norm": 0.5804156748991731,
|
|
"learning_rate": 0.00017705488631532333,
|
|
"loss": 0.4532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4527837634086609,
|
|
"step": 2235,
|
|
"valid_targets_mean": 28909.1,
|
|
"valid_targets_min": 21999
|
|
},
|
|
{
|
|
"epoch": 2.390608324439701,
|
|
"grad_norm": 0.5369453418155822,
|
|
"learning_rate": 0.00017690626433364184,
|
|
"loss": 0.4504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4510282874107361,
|
|
"step": 2240,
|
|
"valid_targets_mean": 28867.9,
|
|
"valid_targets_min": 19981
|
|
},
|
|
{
|
|
"epoch": 2.3959445037353255,
|
|
"grad_norm": 0.5695614019290133,
|
|
"learning_rate": 0.00017675722537832073,
|
|
"loss": 0.4545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4558982253074646,
|
|
"step": 2245,
|
|
"valid_targets_mean": 28881.5,
|
|
"valid_targets_min": 21356
|
|
},
|
|
{
|
|
"epoch": 2.40128068303095,
|
|
"grad_norm": 0.44039244965159946,
|
|
"learning_rate": 0.00017660777025742572,
|
|
"loss": 0.4526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45342639088630676,
|
|
"step": 2250,
|
|
"valid_targets_mean": 28929.8,
|
|
"valid_targets_min": 20524
|
|
},
|
|
{
|
|
"epoch": 2.4066168623265742,
|
|
"grad_norm": 0.4152414652406428,
|
|
"learning_rate": 0.00017645789978127893,
|
|
"loss": 0.4519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45057374238967896,
|
|
"step": 2255,
|
|
"valid_targets_mean": 28970.9,
|
|
"valid_targets_min": 20859
|
|
},
|
|
{
|
|
"epoch": 2.4119530416221986,
|
|
"grad_norm": 0.6508404516893584,
|
|
"learning_rate": 0.00017630761476245446,
|
|
"loss": 0.4541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4561172127723694,
|
|
"step": 2260,
|
|
"valid_targets_mean": 28894.7,
|
|
"valid_targets_min": 23451
|
|
},
|
|
{
|
|
"epoch": 2.417289220917823,
|
|
"grad_norm": 0.5170269358755798,
|
|
"learning_rate": 0.00017615691601577393,
|
|
"loss": 0.4538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45423901081085205,
|
|
"step": 2265,
|
|
"valid_targets_mean": 28926.5,
|
|
"valid_targets_min": 21125
|
|
},
|
|
{
|
|
"epoch": 2.4226254002134473,
|
|
"grad_norm": 0.5539052167798649,
|
|
"learning_rate": 0.00017600580435830226,
|
|
"loss": 0.4539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4492901563644409,
|
|
"step": 2270,
|
|
"valid_targets_mean": 29171.7,
|
|
"valid_targets_min": 18853
|
|
},
|
|
{
|
|
"epoch": 2.4279615795090717,
|
|
"grad_norm": 0.5027064755030347,
|
|
"learning_rate": 0.00017585428060934295,
|
|
"loss": 0.4513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44866707921028137,
|
|
"step": 2275,
|
|
"valid_targets_mean": 28943.6,
|
|
"valid_targets_min": 20915
|
|
},
|
|
{
|
|
"epoch": 2.433297758804696,
|
|
"grad_norm": 0.49172724471201934,
|
|
"learning_rate": 0.00017570234559043392,
|
|
"loss": 0.4533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44804149866104126,
|
|
"step": 2280,
|
|
"valid_targets_mean": 28720.4,
|
|
"valid_targets_min": 17376
|
|
},
|
|
{
|
|
"epoch": 2.4386339381003204,
|
|
"grad_norm": 0.5303285183291916,
|
|
"learning_rate": 0.00017555000012534292,
|
|
"loss": 0.4509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4553695321083069,
|
|
"step": 2285,
|
|
"valid_targets_mean": 28826.4,
|
|
"valid_targets_min": 19118
|
|
},
|
|
{
|
|
"epoch": 2.4439701173959447,
|
|
"grad_norm": 0.4350864203473741,
|
|
"learning_rate": 0.000175397245040063,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4559476375579834,
|
|
"step": 2290,
|
|
"valid_targets_mean": 28908.8,
|
|
"valid_targets_min": 22187
|
|
},
|
|
{
|
|
"epoch": 2.449306296691569,
|
|
"grad_norm": 0.6127096293789136,
|
|
"learning_rate": 0.00017524408116280813,
|
|
"loss": 0.4533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45513585209846497,
|
|
"step": 2295,
|
|
"valid_targets_mean": 28868.0,
|
|
"valid_targets_min": 15207
|
|
},
|
|
{
|
|
"epoch": 2.454642475987193,
|
|
"grad_norm": 0.5051617324594502,
|
|
"learning_rate": 0.00017509050932400876,
|
|
"loss": 0.4529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4498024880886078,
|
|
"step": 2300,
|
|
"valid_targets_mean": 28820.4,
|
|
"valid_targets_min": 20899
|
|
},
|
|
{
|
|
"epoch": 2.4599786552828173,
|
|
"grad_norm": 0.5619716934988601,
|
|
"learning_rate": 0.00017493653035630715,
|
|
"loss": 0.4529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.451460599899292,
|
|
"step": 2305,
|
|
"valid_targets_mean": 28765.6,
|
|
"valid_targets_min": 20019
|
|
},
|
|
{
|
|
"epoch": 2.4653148345784417,
|
|
"grad_norm": 0.49243931962655774,
|
|
"learning_rate": 0.000174782145094553,
|
|
"loss": 0.4506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4520164132118225,
|
|
"step": 2310,
|
|
"valid_targets_mean": 28843.5,
|
|
"valid_targets_min": 17007
|
|
},
|
|
{
|
|
"epoch": 2.470651013874066,
|
|
"grad_norm": 0.582177272434108,
|
|
"learning_rate": 0.00017462735437579884,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4531388580799103,
|
|
"step": 2315,
|
|
"valid_targets_mean": 28881.2,
|
|
"valid_targets_min": 22525
|
|
},
|
|
{
|
|
"epoch": 2.4759871931696904,
|
|
"grad_norm": 0.6116841941244345,
|
|
"learning_rate": 0.00017447215903929552,
|
|
"loss": 0.452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4520295560359955,
|
|
"step": 2320,
|
|
"valid_targets_mean": 28662.7,
|
|
"valid_targets_min": 21514
|
|
},
|
|
{
|
|
"epoch": 2.4813233724653148,
|
|
"grad_norm": 0.5184075672422216,
|
|
"learning_rate": 0.00017431655992648776,
|
|
"loss": 0.452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45252466201782227,
|
|
"step": 2325,
|
|
"valid_targets_mean": 28927.5,
|
|
"valid_targets_min": 23202
|
|
},
|
|
{
|
|
"epoch": 2.486659551760939,
|
|
"grad_norm": 0.6047406574094899,
|
|
"learning_rate": 0.00017416055788100935,
|
|
"loss": 0.4517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45016252994537354,
|
|
"step": 2330,
|
|
"valid_targets_mean": 28785.7,
|
|
"valid_targets_min": 21020
|
|
},
|
|
{
|
|
"epoch": 2.4919957310565635,
|
|
"grad_norm": 0.5824136089989772,
|
|
"learning_rate": 0.0001740041537486788,
|
|
"loss": 0.4517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4471256136894226,
|
|
"step": 2335,
|
|
"valid_targets_mean": 29024.5,
|
|
"valid_targets_min": 23134
|
|
},
|
|
{
|
|
"epoch": 2.497331910352188,
|
|
"grad_norm": 0.5065556408907309,
|
|
"learning_rate": 0.00017384734837749472,
|
|
"loss": 0.4505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4542456865310669,
|
|
"step": 2340,
|
|
"valid_targets_mean": 28765.2,
|
|
"valid_targets_min": 21832
|
|
},
|
|
{
|
|
"epoch": 2.502668089647812,
|
|
"grad_norm": 0.552973006999218,
|
|
"learning_rate": 0.000173690142617631,
|
|
"loss": 0.4509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4535852074623108,
|
|
"step": 2345,
|
|
"valid_targets_mean": 28844.7,
|
|
"valid_targets_min": 19678
|
|
},
|
|
{
|
|
"epoch": 2.5080042689434365,
|
|
"grad_norm": 0.4743900747981737,
|
|
"learning_rate": 0.00017353253732143255,
|
|
"loss": 0.4518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4519196152687073,
|
|
"step": 2350,
|
|
"valid_targets_mean": 28954.9,
|
|
"valid_targets_min": 22529
|
|
},
|
|
{
|
|
"epoch": 2.513340448239061,
|
|
"grad_norm": 0.562657221120804,
|
|
"learning_rate": 0.00017337453334341044,
|
|
"loss": 0.4512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44621649384498596,
|
|
"step": 2355,
|
|
"valid_targets_mean": 28777.2,
|
|
"valid_targets_min": 22850
|
|
},
|
|
{
|
|
"epoch": 2.5186766275346852,
|
|
"grad_norm": 0.6273755858177126,
|
|
"learning_rate": 0.00017321613154023727,
|
|
"loss": 0.4513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4545447826385498,
|
|
"step": 2360,
|
|
"valid_targets_mean": 28767.7,
|
|
"valid_targets_min": 22860
|
|
},
|
|
{
|
|
"epoch": 2.5240128068303096,
|
|
"grad_norm": 0.5944845566511393,
|
|
"learning_rate": 0.00017305733277074272,
|
|
"loss": 0.4495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4476246237754822,
|
|
"step": 2365,
|
|
"valid_targets_mean": 28874.1,
|
|
"valid_targets_min": 16924
|
|
},
|
|
{
|
|
"epoch": 2.529348986125934,
|
|
"grad_norm": 0.44790911582263976,
|
|
"learning_rate": 0.00017289813789590864,
|
|
"loss": 0.4522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4487800896167755,
|
|
"step": 2370,
|
|
"valid_targets_mean": 28719.8,
|
|
"valid_targets_min": 20685
|
|
},
|
|
{
|
|
"epoch": 2.5346851654215583,
|
|
"grad_norm": 0.743814817864195,
|
|
"learning_rate": 0.0001727385477788645,
|
|
"loss": 0.4505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4476528763771057,
|
|
"step": 2375,
|
|
"valid_targets_mean": 29018.1,
|
|
"valid_targets_min": 22291
|
|
},
|
|
{
|
|
"epoch": 2.5400213447171827,
|
|
"grad_norm": 0.5011954170333152,
|
|
"learning_rate": 0.00017257856328488285,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45606738328933716,
|
|
"step": 2380,
|
|
"valid_targets_mean": 28651.0,
|
|
"valid_targets_min": 21448
|
|
},
|
|
{
|
|
"epoch": 2.5453575240128066,
|
|
"grad_norm": 0.48315014269896034,
|
|
"learning_rate": 0.0001724181852813743,
|
|
"loss": 0.4503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44993042945861816,
|
|
"step": 2385,
|
|
"valid_targets_mean": 28859.8,
|
|
"valid_targets_min": 21386
|
|
},
|
|
{
|
|
"epoch": 2.550693703308431,
|
|
"grad_norm": 0.49675712419607215,
|
|
"learning_rate": 0.00017225741463788315,
|
|
"loss": 0.4505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45400500297546387,
|
|
"step": 2390,
|
|
"valid_targets_mean": 28968.6,
|
|
"valid_targets_min": 17603
|
|
},
|
|
{
|
|
"epoch": 2.5560298826040553,
|
|
"grad_norm": 0.5173302426897215,
|
|
"learning_rate": 0.0001720962522260825,
|
|
"loss": 0.4521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45111632347106934,
|
|
"step": 2395,
|
|
"valid_targets_mean": 28820.2,
|
|
"valid_targets_min": 19026
|
|
},
|
|
{
|
|
"epoch": 2.5613660618996796,
|
|
"grad_norm": 0.6860815784203482,
|
|
"learning_rate": 0.00017193469891976947,
|
|
"loss": 0.4494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4513351619243622,
|
|
"step": 2400,
|
|
"valid_targets_mean": 28871.1,
|
|
"valid_targets_min": 22025
|
|
},
|
|
{
|
|
"epoch": 2.566702241195304,
|
|
"grad_norm": 0.7030866923646217,
|
|
"learning_rate": 0.00017177275559486068,
|
|
"loss": 0.4521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4503113627433777,
|
|
"step": 2405,
|
|
"valid_targets_mean": 28827.6,
|
|
"valid_targets_min": 19319
|
|
},
|
|
{
|
|
"epoch": 2.5720384204909283,
|
|
"grad_norm": 0.43516321280201453,
|
|
"learning_rate": 0.00017161042312938723,
|
|
"loss": 0.4509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4512559771537781,
|
|
"step": 2410,
|
|
"valid_targets_mean": 28753.2,
|
|
"valid_targets_min": 22762
|
|
},
|
|
{
|
|
"epoch": 2.5773745997865527,
|
|
"grad_norm": 0.5133859107024392,
|
|
"learning_rate": 0.00017144770240349022,
|
|
"loss": 0.45,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4520412087440491,
|
|
"step": 2415,
|
|
"valid_targets_mean": 28935.5,
|
|
"valid_targets_min": 21925
|
|
},
|
|
{
|
|
"epoch": 2.582710779082177,
|
|
"grad_norm": 0.5064856540613286,
|
|
"learning_rate": 0.00017128459429941576,
|
|
"loss": 0.4485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44660258293151855,
|
|
"step": 2420,
|
|
"valid_targets_mean": 28878.1,
|
|
"valid_targets_min": 18457
|
|
},
|
|
{
|
|
"epoch": 2.5880469583778014,
|
|
"grad_norm": 0.5846486099782726,
|
|
"learning_rate": 0.00017112109970151025,
|
|
"loss": 0.4516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45156729221343994,
|
|
"step": 2425,
|
|
"valid_targets_mean": 28922.7,
|
|
"valid_targets_min": 19523
|
|
},
|
|
{
|
|
"epoch": 2.5933831376734258,
|
|
"grad_norm": 0.5187420506239093,
|
|
"learning_rate": 0.00017095721949621565,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4488029181957245,
|
|
"step": 2430,
|
|
"valid_targets_mean": 28784.3,
|
|
"valid_targets_min": 18489
|
|
},
|
|
{
|
|
"epoch": 2.59871931696905,
|
|
"grad_norm": 0.5900113475966927,
|
|
"learning_rate": 0.00017079295457206456,
|
|
"loss": 0.4504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4478583335876465,
|
|
"step": 2435,
|
|
"valid_targets_mean": 28929.3,
|
|
"valid_targets_min": 20301
|
|
},
|
|
{
|
|
"epoch": 2.6040554962646745,
|
|
"grad_norm": 0.5132399927767499,
|
|
"learning_rate": 0.0001706283058196755,
|
|
"loss": 0.448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44799095392227173,
|
|
"step": 2440,
|
|
"valid_targets_mean": 28912.8,
|
|
"valid_targets_min": 20144
|
|
},
|
|
{
|
|
"epoch": 2.609391675560299,
|
|
"grad_norm": 0.4623662629357846,
|
|
"learning_rate": 0.0001704632741317481,
|
|
"loss": 0.4482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4481392502784729,
|
|
"step": 2445,
|
|
"valid_targets_mean": 28754.9,
|
|
"valid_targets_min": 22290
|
|
},
|
|
{
|
|
"epoch": 2.614727854855923,
|
|
"grad_norm": 0.5131007295366438,
|
|
"learning_rate": 0.00017029786040305807,
|
|
"loss": 0.4498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44938600063323975,
|
|
"step": 2450,
|
|
"valid_targets_mean": 28862.4,
|
|
"valid_targets_min": 20307
|
|
},
|
|
{
|
|
"epoch": 2.6200640341515475,
|
|
"grad_norm": 0.578946446438132,
|
|
"learning_rate": 0.00017013206553045268,
|
|
"loss": 0.4499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45322614908218384,
|
|
"step": 2455,
|
|
"valid_targets_mean": 29086.3,
|
|
"valid_targets_min": 23229
|
|
},
|
|
{
|
|
"epoch": 2.625400213447172,
|
|
"grad_norm": 0.5302147435478608,
|
|
"learning_rate": 0.00016996589041284546,
|
|
"loss": 0.4485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4469335079193115,
|
|
"step": 2460,
|
|
"valid_targets_mean": 28731.3,
|
|
"valid_targets_min": 18282
|
|
},
|
|
{
|
|
"epoch": 2.6307363927427962,
|
|
"grad_norm": 0.5319875687319359,
|
|
"learning_rate": 0.00016979933595121177,
|
|
"loss": 0.4473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44532909989356995,
|
|
"step": 2465,
|
|
"valid_targets_mean": 28779.5,
|
|
"valid_targets_min": 19670
|
|
},
|
|
{
|
|
"epoch": 2.6360725720384206,
|
|
"grad_norm": 0.5879657232332226,
|
|
"learning_rate": 0.00016963240304858362,
|
|
"loss": 0.4469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45386573672294617,
|
|
"step": 2470,
|
|
"valid_targets_mean": 28739.6,
|
|
"valid_targets_min": 19311
|
|
},
|
|
{
|
|
"epoch": 2.641408751334045,
|
|
"grad_norm": 0.5768699609842823,
|
|
"learning_rate": 0.00016946509261004495,
|
|
"loss": 0.4497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4479980766773224,
|
|
"step": 2475,
|
|
"valid_targets_mean": 28779.4,
|
|
"valid_targets_min": 20019
|
|
},
|
|
{
|
|
"epoch": 2.6467449306296693,
|
|
"grad_norm": 0.4828563085880993,
|
|
"learning_rate": 0.00016929740554272646,
|
|
"loss": 0.4473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4485243558883667,
|
|
"step": 2480,
|
|
"valid_targets_mean": 28762.2,
|
|
"valid_targets_min": 20567
|
|
},
|
|
{
|
|
"epoch": 2.6520811099252937,
|
|
"grad_norm": 0.5224380520811517,
|
|
"learning_rate": 0.0001691293427558011,
|
|
"loss": 0.4499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4490726888179779,
|
|
"step": 2485,
|
|
"valid_targets_mean": 28755.4,
|
|
"valid_targets_min": 21414
|
|
},
|
|
{
|
|
"epoch": 2.657417289220918,
|
|
"grad_norm": 0.40870295711277854,
|
|
"learning_rate": 0.00016896090516047872,
|
|
"loss": 0.4494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.451454758644104,
|
|
"step": 2490,
|
|
"valid_targets_mean": 28876.6,
|
|
"valid_targets_min": 17603
|
|
},
|
|
{
|
|
"epoch": 2.6627534685165424,
|
|
"grad_norm": 0.43593658645048067,
|
|
"learning_rate": 0.0001687920936700015,
|
|
"loss": 0.4482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4449622631072998,
|
|
"step": 2495,
|
|
"valid_targets_mean": 28777.7,
|
|
"valid_targets_min": 22670
|
|
},
|
|
{
|
|
"epoch": 2.6680896478121667,
|
|
"grad_norm": 0.4954786841422757,
|
|
"learning_rate": 0.00016862290919963862,
|
|
"loss": 0.4482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4446685016155243,
|
|
"step": 2500,
|
|
"valid_targets_mean": 28948.7,
|
|
"valid_targets_min": 23387
|
|
},
|
|
{
|
|
"epoch": 2.673425827107791,
|
|
"grad_norm": 0.6120561515411104,
|
|
"learning_rate": 0.00016845335266668166,
|
|
"loss": 0.447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4459884464740753,
|
|
"step": 2505,
|
|
"valid_targets_mean": 28897.3,
|
|
"valid_targets_min": 22086
|
|
},
|
|
{
|
|
"epoch": 2.678762006403415,
|
|
"grad_norm": 0.48711779527463955,
|
|
"learning_rate": 0.0001682834249904394,
|
|
"loss": 0.4482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4462966322898865,
|
|
"step": 2510,
|
|
"valid_targets_mean": 28739.7,
|
|
"valid_targets_min": 20030
|
|
},
|
|
{
|
|
"epoch": 2.6840981856990394,
|
|
"grad_norm": 0.5285800413474548,
|
|
"learning_rate": 0.00016811312709223293,
|
|
"loss": 0.4484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4494142532348633,
|
|
"step": 2515,
|
|
"valid_targets_mean": 28892.6,
|
|
"valid_targets_min": 22882
|
|
},
|
|
{
|
|
"epoch": 2.6894343649946637,
|
|
"grad_norm": 0.547280626907796,
|
|
"learning_rate": 0.00016794245989539054,
|
|
"loss": 0.448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44688770174980164,
|
|
"step": 2520,
|
|
"valid_targets_mean": 28904.5,
|
|
"valid_targets_min": 22965
|
|
},
|
|
{
|
|
"epoch": 2.694770544290288,
|
|
"grad_norm": 0.48172505653099523,
|
|
"learning_rate": 0.00016777142432524293,
|
|
"loss": 0.4457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44005584716796875,
|
|
"step": 2525,
|
|
"valid_targets_mean": 28885.4,
|
|
"valid_targets_min": 19314
|
|
},
|
|
{
|
|
"epoch": 2.7001067235859124,
|
|
"grad_norm": 0.6083329167340638,
|
|
"learning_rate": 0.00016760002130911797,
|
|
"loss": 0.4472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4509948492050171,
|
|
"step": 2530,
|
|
"valid_targets_mean": 28831.2,
|
|
"valid_targets_min": 18138
|
|
},
|
|
{
|
|
"epoch": 2.7054429028815368,
|
|
"grad_norm": 0.49608367234784617,
|
|
"learning_rate": 0.0001674282517763358,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44667625427246094,
|
|
"step": 2535,
|
|
"valid_targets_mean": 28749.2,
|
|
"valid_targets_min": 20359
|
|
},
|
|
{
|
|
"epoch": 2.710779082177161,
|
|
"grad_norm": 0.5036192923672199,
|
|
"learning_rate": 0.0001672561166582037,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4464219808578491,
|
|
"step": 2540,
|
|
"valid_targets_mean": 29040.9,
|
|
"valid_targets_min": 21940
|
|
},
|
|
{
|
|
"epoch": 2.7161152614727855,
|
|
"grad_norm": 0.5175847995127293,
|
|
"learning_rate": 0.0001670836168880112,
|
|
"loss": 0.4474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44696956872940063,
|
|
"step": 2545,
|
|
"valid_targets_mean": 28739.1,
|
|
"valid_targets_min": 21583
|
|
},
|
|
{
|
|
"epoch": 2.72145144076841,
|
|
"grad_norm": 0.5467711736200832,
|
|
"learning_rate": 0.00016691075340102485,
|
|
"loss": 0.4451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4472172260284424,
|
|
"step": 2550,
|
|
"valid_targets_mean": 28817.8,
|
|
"valid_targets_min": 22683
|
|
},
|
|
{
|
|
"epoch": 2.726787620064034,
|
|
"grad_norm": 0.365966177831252,
|
|
"learning_rate": 0.00016673752713448314,
|
|
"loss": 0.448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44385796785354614,
|
|
"step": 2555,
|
|
"valid_targets_mean": 28766.2,
|
|
"valid_targets_min": 20755
|
|
},
|
|
{
|
|
"epoch": 2.7321237993596585,
|
|
"grad_norm": 0.616521778989443,
|
|
"learning_rate": 0.00016656393902759166,
|
|
"loss": 0.4475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4502711296081543,
|
|
"step": 2560,
|
|
"valid_targets_mean": 28900.8,
|
|
"valid_targets_min": 21489
|
|
},
|
|
{
|
|
"epoch": 2.737459978655283,
|
|
"grad_norm": 0.5919171429734628,
|
|
"learning_rate": 0.00016638999002151775,
|
|
"loss": 0.447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4452345073223114,
|
|
"step": 2565,
|
|
"valid_targets_mean": 28944.9,
|
|
"valid_targets_min": 18314
|
|
},
|
|
{
|
|
"epoch": 2.7427961579509073,
|
|
"grad_norm": 0.49211930960693656,
|
|
"learning_rate": 0.00016621568105938548,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44687044620513916,
|
|
"step": 2570,
|
|
"valid_targets_mean": 28789.1,
|
|
"valid_targets_min": 21046
|
|
},
|
|
{
|
|
"epoch": 2.7481323372465316,
|
|
"grad_norm": 0.4225749650195122,
|
|
"learning_rate": 0.00016604101308627053,
|
|
"loss": 0.4465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4477379322052002,
|
|
"step": 2575,
|
|
"valid_targets_mean": 28817.5,
|
|
"valid_targets_min": 20663
|
|
},
|
|
{
|
|
"epoch": 2.753468516542156,
|
|
"grad_norm": 0.5587518323984962,
|
|
"learning_rate": 0.00016586598704919516,
|
|
"loss": 0.4476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4498264789581299,
|
|
"step": 2580,
|
|
"valid_targets_mean": 28788.0,
|
|
"valid_targets_min": 21730
|
|
},
|
|
{
|
|
"epoch": 2.75880469583778,
|
|
"grad_norm": 0.5402153975521578,
|
|
"learning_rate": 0.0001656906038971229,
|
|
"loss": 0.4462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44426554441452026,
|
|
"step": 2585,
|
|
"valid_targets_mean": 28842.4,
|
|
"valid_targets_min": 20872
|
|
},
|
|
{
|
|
"epoch": 2.7641408751334042,
|
|
"grad_norm": 0.5402476426313789,
|
|
"learning_rate": 0.0001655148645809536,
|
|
"loss": 0.4447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4427042007446289,
|
|
"step": 2590,
|
|
"valid_targets_mean": 28991.9,
|
|
"valid_targets_min": 22105
|
|
},
|
|
{
|
|
"epoch": 2.7694770544290286,
|
|
"grad_norm": 0.48279597780594186,
|
|
"learning_rate": 0.0001653387700535181,
|
|
"loss": 0.4458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4428751468658447,
|
|
"step": 2595,
|
|
"valid_targets_mean": 28902.3,
|
|
"valid_targets_min": 20062
|
|
},
|
|
{
|
|
"epoch": 2.774813233724653,
|
|
"grad_norm": 0.4834282171961409,
|
|
"learning_rate": 0.00016516232126957312,
|
|
"loss": 0.445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44715070724487305,
|
|
"step": 2600,
|
|
"valid_targets_mean": 29066.2,
|
|
"valid_targets_min": 18312
|
|
},
|
|
{
|
|
"epoch": 2.7801494130202773,
|
|
"grad_norm": 0.4401907898711959,
|
|
"learning_rate": 0.00016498551918579622,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44535985589027405,
|
|
"step": 2605,
|
|
"valid_targets_mean": 28839.6,
|
|
"valid_targets_min": 17353
|
|
},
|
|
{
|
|
"epoch": 2.7854855923159016,
|
|
"grad_norm": 0.5304002176279604,
|
|
"learning_rate": 0.00016480836476078038,
|
|
"loss": 0.4466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44503068923950195,
|
|
"step": 2610,
|
|
"valid_targets_mean": 28926.2,
|
|
"valid_targets_min": 22986
|
|
},
|
|
{
|
|
"epoch": 2.790821771611526,
|
|
"grad_norm": 0.41619534514956896,
|
|
"learning_rate": 0.00016463085895502893,
|
|
"loss": 0.449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4478036165237427,
|
|
"step": 2615,
|
|
"valid_targets_mean": 28903.5,
|
|
"valid_targets_min": 21244
|
|
},
|
|
{
|
|
"epoch": 2.7961579509071504,
|
|
"grad_norm": 0.5269289182637616,
|
|
"learning_rate": 0.00016445300273095048,
|
|
"loss": 0.448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44885891675949097,
|
|
"step": 2620,
|
|
"valid_targets_mean": 28840.6,
|
|
"valid_targets_min": 22507
|
|
},
|
|
{
|
|
"epoch": 2.8014941302027747,
|
|
"grad_norm": 0.599103907176324,
|
|
"learning_rate": 0.0001642747970528533,
|
|
"loss": 0.4457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4468061327934265,
|
|
"step": 2625,
|
|
"valid_targets_mean": 28698.8,
|
|
"valid_targets_min": 20942
|
|
},
|
|
{
|
|
"epoch": 2.806830309498399,
|
|
"grad_norm": 0.4800140872410769,
|
|
"learning_rate": 0.00016409624288694056,
|
|
"loss": 0.4472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4467303156852722,
|
|
"step": 2630,
|
|
"valid_targets_mean": 28816.9,
|
|
"valid_targets_min": 18328
|
|
},
|
|
{
|
|
"epoch": 2.8121664887940234,
|
|
"grad_norm": 0.5266127538960025,
|
|
"learning_rate": 0.00016391734120130478,
|
|
"loss": 0.4463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4444383382797241,
|
|
"step": 2635,
|
|
"valid_targets_mean": 28910.7,
|
|
"valid_targets_min": 18160
|
|
},
|
|
{
|
|
"epoch": 2.8175026680896478,
|
|
"grad_norm": 0.43419276028585196,
|
|
"learning_rate": 0.00016373809296592266,
|
|
"loss": 0.4447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.443906307220459,
|
|
"step": 2640,
|
|
"valid_targets_mean": 28914.4,
|
|
"valid_targets_min": 23648
|
|
},
|
|
{
|
|
"epoch": 2.822838847385272,
|
|
"grad_norm": 0.491944631368096,
|
|
"learning_rate": 0.00016355849915264989,
|
|
"loss": 0.449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44785192608833313,
|
|
"step": 2645,
|
|
"valid_targets_mean": 28810.1,
|
|
"valid_targets_min": 21924
|
|
},
|
|
{
|
|
"epoch": 2.8281750266808965,
|
|
"grad_norm": 0.5124190278407998,
|
|
"learning_rate": 0.00016337856073521585,
|
|
"loss": 0.4467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44588106870651245,
|
|
"step": 2650,
|
|
"valid_targets_mean": 28823.3,
|
|
"valid_targets_min": 21076
|
|
},
|
|
{
|
|
"epoch": 2.833511205976521,
|
|
"grad_norm": 0.46000176515786123,
|
|
"learning_rate": 0.00016319827868921822,
|
|
"loss": 0.4449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44242948293685913,
|
|
"step": 2655,
|
|
"valid_targets_mean": 28862.9,
|
|
"valid_targets_min": 17089
|
|
},
|
|
{
|
|
"epoch": 2.838847385272145,
|
|
"grad_norm": 0.5123830710533339,
|
|
"learning_rate": 0.0001630176539921178,
|
|
"loss": 0.4467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4461979866027832,
|
|
"step": 2660,
|
|
"valid_targets_mean": 28735.5,
|
|
"valid_targets_min": 22873
|
|
},
|
|
{
|
|
"epoch": 2.8441835645677696,
|
|
"grad_norm": 0.5125682124858327,
|
|
"learning_rate": 0.0001628366876232333,
|
|
"loss": 0.4439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4459686875343323,
|
|
"step": 2665,
|
|
"valid_targets_mean": 28800.9,
|
|
"valid_targets_min": 21201
|
|
},
|
|
{
|
|
"epoch": 2.849519743863394,
|
|
"grad_norm": 0.48497008568172956,
|
|
"learning_rate": 0.00016265538056373582,
|
|
"loss": 0.4465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44951844215393066,
|
|
"step": 2670,
|
|
"valid_targets_mean": 28769.1,
|
|
"valid_targets_min": 20652
|
|
},
|
|
{
|
|
"epoch": 2.8548559231590183,
|
|
"grad_norm": 0.5092773446282002,
|
|
"learning_rate": 0.00016247373379664362,
|
|
"loss": 0.4457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44676268100738525,
|
|
"step": 2675,
|
|
"valid_targets_mean": 28809.3,
|
|
"valid_targets_min": 19575
|
|
},
|
|
{
|
|
"epoch": 2.8601921024546426,
|
|
"grad_norm": 0.4780353311067859,
|
|
"learning_rate": 0.00016229174830681683,
|
|
"loss": 0.4445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44372624158859253,
|
|
"step": 2680,
|
|
"valid_targets_mean": 28971.6,
|
|
"valid_targets_min": 15604
|
|
},
|
|
{
|
|
"epoch": 2.865528281750267,
|
|
"grad_norm": 0.5115745909841504,
|
|
"learning_rate": 0.00016210942508095212,
|
|
"loss": 0.4426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44047439098358154,
|
|
"step": 2685,
|
|
"valid_targets_mean": 28909.9,
|
|
"valid_targets_min": 23261
|
|
},
|
|
{
|
|
"epoch": 2.8708644610458913,
|
|
"grad_norm": 0.5193392457381629,
|
|
"learning_rate": 0.00016192676510757718,
|
|
"loss": 0.4457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44597327709198,
|
|
"step": 2690,
|
|
"valid_targets_mean": 28944.7,
|
|
"valid_targets_min": 21576
|
|
},
|
|
{
|
|
"epoch": 2.8762006403415157,
|
|
"grad_norm": 0.5480375004719434,
|
|
"learning_rate": 0.0001617437693770457,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4467953145503998,
|
|
"step": 2695,
|
|
"valid_targets_mean": 28884.0,
|
|
"valid_targets_min": 22438
|
|
},
|
|
{
|
|
"epoch": 2.88153681963714,
|
|
"grad_norm": 0.44906841653427015,
|
|
"learning_rate": 0.00016156043888153164,
|
|
"loss": 0.4441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4425533413887024,
|
|
"step": 2700,
|
|
"valid_targets_mean": 28806.5,
|
|
"valid_targets_min": 19526
|
|
},
|
|
{
|
|
"epoch": 2.8868729989327644,
|
|
"grad_norm": 0.621668370814208,
|
|
"learning_rate": 0.00016137677461502403,
|
|
"loss": 0.4446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4372178316116333,
|
|
"step": 2705,
|
|
"valid_targets_mean": 28990.3,
|
|
"valid_targets_min": 23225
|
|
},
|
|
{
|
|
"epoch": 2.8922091782283887,
|
|
"grad_norm": 0.6787756827283434,
|
|
"learning_rate": 0.00016119277757332157,
|
|
"loss": 0.443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4457907974720001,
|
|
"step": 2710,
|
|
"valid_targets_mean": 28859.2,
|
|
"valid_targets_min": 20131
|
|
},
|
|
{
|
|
"epoch": 2.8975453575240127,
|
|
"grad_norm": 0.45377658740918214,
|
|
"learning_rate": 0.0001610084487540272,
|
|
"loss": 0.4449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44517552852630615,
|
|
"step": 2715,
|
|
"valid_targets_mean": 28863.3,
|
|
"valid_targets_min": 22475
|
|
},
|
|
{
|
|
"epoch": 2.902881536819637,
|
|
"grad_norm": 0.5890326523356322,
|
|
"learning_rate": 0.00016082378915654275,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4380131959915161,
|
|
"step": 2720,
|
|
"valid_targets_mean": 28753.4,
|
|
"valid_targets_min": 20177
|
|
},
|
|
{
|
|
"epoch": 2.9082177161152614,
|
|
"grad_norm": 0.4777061022556069,
|
|
"learning_rate": 0.0001606387997820634,
|
|
"loss": 0.4458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4443918466567993,
|
|
"step": 2725,
|
|
"valid_targets_mean": 29008.7,
|
|
"valid_targets_min": 22739
|
|
},
|
|
{
|
|
"epoch": 2.9135538954108857,
|
|
"grad_norm": 0.4309800478585167,
|
|
"learning_rate": 0.0001604534816335724,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4474114179611206,
|
|
"step": 2730,
|
|
"valid_targets_mean": 28760.2,
|
|
"valid_targets_min": 22612
|
|
},
|
|
{
|
|
"epoch": 2.91889007470651,
|
|
"grad_norm": 0.5449182954064392,
|
|
"learning_rate": 0.0001602678357158355,
|
|
"loss": 0.4441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44207650423049927,
|
|
"step": 2735,
|
|
"valid_targets_mean": 28849.7,
|
|
"valid_targets_min": 21517
|
|
},
|
|
{
|
|
"epoch": 2.9242262540021344,
|
|
"grad_norm": 0.4298551008009081,
|
|
"learning_rate": 0.00016008186303539566,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44367218017578125,
|
|
"step": 2740,
|
|
"valid_targets_mean": 28961.5,
|
|
"valid_targets_min": 23110
|
|
},
|
|
{
|
|
"epoch": 2.929562433297759,
|
|
"grad_norm": 0.4524289912205744,
|
|
"learning_rate": 0.00015989556460056735,
|
|
"loss": 0.4432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4443000257015228,
|
|
"step": 2745,
|
|
"valid_targets_mean": 28859.4,
|
|
"valid_targets_min": 20708
|
|
},
|
|
{
|
|
"epoch": 2.934898612593383,
|
|
"grad_norm": 0.44109632853564523,
|
|
"learning_rate": 0.00015970894142143137,
|
|
"loss": 0.4423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44295409321784973,
|
|
"step": 2750,
|
|
"valid_targets_mean": 28640.2,
|
|
"valid_targets_min": 18731
|
|
},
|
|
{
|
|
"epoch": 2.9402347918890075,
|
|
"grad_norm": 0.6280674197563827,
|
|
"learning_rate": 0.00015952199450982915,
|
|
"loss": 0.4408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4429868459701538,
|
|
"step": 2755,
|
|
"valid_targets_mean": 28917.0,
|
|
"valid_targets_min": 21891
|
|
},
|
|
{
|
|
"epoch": 2.945570971184632,
|
|
"grad_norm": 0.46508843136826117,
|
|
"learning_rate": 0.0001593347248793574,
|
|
"loss": 0.4448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4444461166858673,
|
|
"step": 2760,
|
|
"valid_targets_mean": 28888.2,
|
|
"valid_targets_min": 22959
|
|
},
|
|
{
|
|
"epoch": 2.950907150480256,
|
|
"grad_norm": 0.46799530110116033,
|
|
"learning_rate": 0.00015914713354536243,
|
|
"loss": 0.4448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44128793478012085,
|
|
"step": 2765,
|
|
"valid_targets_mean": 28796.6,
|
|
"valid_targets_min": 21241
|
|
},
|
|
{
|
|
"epoch": 2.9562433297758806,
|
|
"grad_norm": 0.47772804494469634,
|
|
"learning_rate": 0.000158959221524935,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44201916456222534,
|
|
"step": 2770,
|
|
"valid_targets_mean": 28826.8,
|
|
"valid_targets_min": 22051
|
|
},
|
|
{
|
|
"epoch": 2.961579509071505,
|
|
"grad_norm": 0.44155331091775607,
|
|
"learning_rate": 0.00015877098983690442,
|
|
"loss": 0.4432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4451548755168915,
|
|
"step": 2775,
|
|
"valid_targets_mean": 28904.8,
|
|
"valid_targets_min": 20706
|
|
},
|
|
{
|
|
"epoch": 2.9669156883671293,
|
|
"grad_norm": 0.48382287565622245,
|
|
"learning_rate": 0.00015858243950183317,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4427657723426819,
|
|
"step": 2780,
|
|
"valid_targets_mean": 28867.6,
|
|
"valid_targets_min": 17286
|
|
},
|
|
{
|
|
"epoch": 2.9722518676627536,
|
|
"grad_norm": 0.5766134569712373,
|
|
"learning_rate": 0.0001583935715420115,
|
|
"loss": 0.4437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4461662471294403,
|
|
"step": 2785,
|
|
"valid_targets_mean": 28908.2,
|
|
"valid_targets_min": 21011
|
|
},
|
|
{
|
|
"epoch": 2.9775880469583775,
|
|
"grad_norm": 0.4695408328680145,
|
|
"learning_rate": 0.0001582043869814517,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44310444593429565,
|
|
"step": 2790,
|
|
"valid_targets_mean": 28761.7,
|
|
"valid_targets_min": 21496
|
|
},
|
|
{
|
|
"epoch": 2.982924226254002,
|
|
"grad_norm": 0.5000132923426082,
|
|
"learning_rate": 0.0001580148868458826,
|
|
"loss": 0.4418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44318729639053345,
|
|
"step": 2795,
|
|
"valid_targets_mean": 28784.2,
|
|
"valid_targets_min": 21259
|
|
},
|
|
{
|
|
"epoch": 2.9882604055496262,
|
|
"grad_norm": 0.4274040097496733,
|
|
"learning_rate": 0.0001578250721627441,
|
|
"loss": 0.4434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4456891417503357,
|
|
"step": 2800,
|
|
"valid_targets_mean": 28815.1,
|
|
"valid_targets_min": 17412
|
|
},
|
|
{
|
|
"epoch": 2.9935965848452506,
|
|
"grad_norm": 0.5125046838708746,
|
|
"learning_rate": 0.00015763494396118144,
|
|
"loss": 0.4423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44653642177581787,
|
|
"step": 2805,
|
|
"valid_targets_mean": 28916.4,
|
|
"valid_targets_min": 22867
|
|
},
|
|
{
|
|
"epoch": 2.998932764140875,
|
|
"grad_norm": 0.4815289265414247,
|
|
"learning_rate": 0.00015744450327203976,
|
|
"loss": 0.4427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4451421797275543,
|
|
"step": 2810,
|
|
"valid_targets_mean": 28983.4,
|
|
"valid_targets_min": 19654
|
|
},
|
|
{
|
|
"epoch": 3.0042689434364993,
|
|
"grad_norm": 0.4490098209846971,
|
|
"learning_rate": 0.00015725375112785845,
|
|
"loss": 0.4358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4360910654067993,
|
|
"step": 2815,
|
|
"valid_targets_mean": 28692.1,
|
|
"valid_targets_min": 21197
|
|
},
|
|
{
|
|
"epoch": 3.0096051227321237,
|
|
"grad_norm": 0.4846583619603035,
|
|
"learning_rate": 0.0001570626885628656,
|
|
"loss": 0.4375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4442782402038574,
|
|
"step": 2820,
|
|
"valid_targets_mean": 28761.2,
|
|
"valid_targets_min": 21455
|
|
},
|
|
{
|
|
"epoch": 3.014941302027748,
|
|
"grad_norm": 0.5843775640172333,
|
|
"learning_rate": 0.00015687131661297227,
|
|
"loss": 0.439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44055095314979553,
|
|
"step": 2825,
|
|
"valid_targets_mean": 28790.7,
|
|
"valid_targets_min": 22615
|
|
},
|
|
{
|
|
"epoch": 3.0202774813233724,
|
|
"grad_norm": 0.42977687830721756,
|
|
"learning_rate": 0.00015667963631576704,
|
|
"loss": 0.4377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44009584188461304,
|
|
"step": 2830,
|
|
"valid_targets_mean": 28764.4,
|
|
"valid_targets_min": 20275
|
|
},
|
|
{
|
|
"epoch": 3.0256136606189967,
|
|
"grad_norm": 0.5515477501528725,
|
|
"learning_rate": 0.00015648764871051022,
|
|
"loss": 0.4372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44307464361190796,
|
|
"step": 2835,
|
|
"valid_targets_mean": 28895.8,
|
|
"valid_targets_min": 22290
|
|
},
|
|
{
|
|
"epoch": 3.030949839914621,
|
|
"grad_norm": 0.5626533008663859,
|
|
"learning_rate": 0.00015629535483812834,
|
|
"loss": 0.4374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44284501671791077,
|
|
"step": 2840,
|
|
"valid_targets_mean": 29000.4,
|
|
"valid_targets_min": 22498
|
|
},
|
|
{
|
|
"epoch": 3.0362860192102454,
|
|
"grad_norm": 0.5902662848270563,
|
|
"learning_rate": 0.00015610275574120847,
|
|
"loss": 0.437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4383416771888733,
|
|
"step": 2845,
|
|
"valid_targets_mean": 28894.3,
|
|
"valid_targets_min": 20728
|
|
},
|
|
{
|
|
"epoch": 3.04162219850587,
|
|
"grad_norm": 0.4776689992581966,
|
|
"learning_rate": 0.00015590985246399254,
|
|
"loss": 0.4385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4417439103126526,
|
|
"step": 2850,
|
|
"valid_targets_mean": 28637.8,
|
|
"valid_targets_min": 22253
|
|
},
|
|
{
|
|
"epoch": 3.046958377801494,
|
|
"grad_norm": 0.5814793838248311,
|
|
"learning_rate": 0.0001557166460523717,
|
|
"loss": 0.4392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4406072497367859,
|
|
"step": 2855,
|
|
"valid_targets_mean": 28745.9,
|
|
"valid_targets_min": 21289
|
|
},
|
|
{
|
|
"epoch": 3.0522945570971185,
|
|
"grad_norm": 0.43664177754927597,
|
|
"learning_rate": 0.00015552313755388072,
|
|
"loss": 0.4407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43565136194229126,
|
|
"step": 2860,
|
|
"valid_targets_mean": 28927.1,
|
|
"valid_targets_min": 23185
|
|
},
|
|
{
|
|
"epoch": 3.057630736392743,
|
|
"grad_norm": 0.4565338949278503,
|
|
"learning_rate": 0.00015532932801769203,
|
|
"loss": 0.4384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44098299741744995,
|
|
"step": 2865,
|
|
"valid_targets_mean": 28965.3,
|
|
"valid_targets_min": 19321
|
|
},
|
|
{
|
|
"epoch": 3.062966915688367,
|
|
"grad_norm": 0.5857395045348511,
|
|
"learning_rate": 0.0001551352184946105,
|
|
"loss": 0.4374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43504267930984497,
|
|
"step": 2870,
|
|
"valid_targets_mean": 28885.3,
|
|
"valid_targets_min": 22399
|
|
},
|
|
{
|
|
"epoch": 3.0683030949839916,
|
|
"grad_norm": 0.44489437983437763,
|
|
"learning_rate": 0.00015494081003706732,
|
|
"loss": 0.439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4336409568786621,
|
|
"step": 2875,
|
|
"valid_targets_mean": 28808.5,
|
|
"valid_targets_min": 19901
|
|
},
|
|
{
|
|
"epoch": 3.073639274279616,
|
|
"grad_norm": 0.5079821686131969,
|
|
"learning_rate": 0.00015474610369911447,
|
|
"loss": 0.4359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43570592999458313,
|
|
"step": 2880,
|
|
"valid_targets_mean": 28721.2,
|
|
"valid_targets_min": 20846
|
|
},
|
|
{
|
|
"epoch": 3.0789754535752403,
|
|
"grad_norm": 0.5466811796972372,
|
|
"learning_rate": 0.00015455110053641904,
|
|
"loss": 0.4365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43693655729293823,
|
|
"step": 2885,
|
|
"valid_targets_mean": 28707.8,
|
|
"valid_targets_min": 22286
|
|
},
|
|
{
|
|
"epoch": 3.0843116328708646,
|
|
"grad_norm": 0.4747809130741953,
|
|
"learning_rate": 0.00015435580160625746,
|
|
"loss": 0.4361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4353131651878357,
|
|
"step": 2890,
|
|
"valid_targets_mean": 28815.9,
|
|
"valid_targets_min": 19218
|
|
},
|
|
{
|
|
"epoch": 3.089647812166489,
|
|
"grad_norm": 0.5577315428315981,
|
|
"learning_rate": 0.00015416020796750964,
|
|
"loss": 0.4375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4367016553878784,
|
|
"step": 2895,
|
|
"valid_targets_mean": 28818.8,
|
|
"valid_targets_min": 18724
|
|
},
|
|
{
|
|
"epoch": 3.0949839914621133,
|
|
"grad_norm": 0.4377505907429175,
|
|
"learning_rate": 0.00015396432068065356,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43655478954315186,
|
|
"step": 2900,
|
|
"valid_targets_mean": 28768.1,
|
|
"valid_targets_min": 17581
|
|
},
|
|
{
|
|
"epoch": 3.1003201707577372,
|
|
"grad_norm": 0.5422717618043638,
|
|
"learning_rate": 0.00015376814080775915,
|
|
"loss": 0.4391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43886345624923706,
|
|
"step": 2905,
|
|
"valid_targets_mean": 28785.8,
|
|
"valid_targets_min": 20545
|
|
},
|
|
{
|
|
"epoch": 3.1056563500533616,
|
|
"grad_norm": 0.4737668348911352,
|
|
"learning_rate": 0.00015357166941248277,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43835288286209106,
|
|
"step": 2910,
|
|
"valid_targets_mean": 28717.6,
|
|
"valid_targets_min": 22895
|
|
},
|
|
{
|
|
"epoch": 3.110992529348986,
|
|
"grad_norm": 0.543871526659854,
|
|
"learning_rate": 0.00015337490756006132,
|
|
"loss": 0.4381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4373202919960022,
|
|
"step": 2915,
|
|
"valid_targets_mean": 28874.1,
|
|
"valid_targets_min": 21424
|
|
},
|
|
{
|
|
"epoch": 3.1163287086446103,
|
|
"grad_norm": 0.45527342072667687,
|
|
"learning_rate": 0.00015317785631730661,
|
|
"loss": 0.4389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.440227746963501,
|
|
"step": 2920,
|
|
"valid_targets_mean": 28739.2,
|
|
"valid_targets_min": 21858
|
|
},
|
|
{
|
|
"epoch": 3.1216648879402347,
|
|
"grad_norm": 0.49916830905132054,
|
|
"learning_rate": 0.00015298051675259933,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43711093068122864,
|
|
"step": 2925,
|
|
"valid_targets_mean": 28903.5,
|
|
"valid_targets_min": 19999
|
|
},
|
|
{
|
|
"epoch": 3.127001067235859,
|
|
"grad_norm": 0.427372846116701,
|
|
"learning_rate": 0.00015278288993588346,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4339505434036255,
|
|
"step": 2930,
|
|
"valid_targets_mean": 28693.9,
|
|
"valid_targets_min": 17885
|
|
},
|
|
{
|
|
"epoch": 3.1323372465314834,
|
|
"grad_norm": 0.35038643196964536,
|
|
"learning_rate": 0.00015258497693866057,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43530455231666565,
|
|
"step": 2935,
|
|
"valid_targets_mean": 28755.8,
|
|
"valid_targets_min": 22373
|
|
},
|
|
{
|
|
"epoch": 3.1376734258271077,
|
|
"grad_norm": 0.4440862785337304,
|
|
"learning_rate": 0.00015238677883398362,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43910306692123413,
|
|
"step": 2940,
|
|
"valid_targets_mean": 28886.5,
|
|
"valid_targets_min": 21548
|
|
},
|
|
{
|
|
"epoch": 3.143009605122732,
|
|
"grad_norm": 0.5621426459146843,
|
|
"learning_rate": 0.0001521882966964515,
|
|
"loss": 0.4392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43817323446273804,
|
|
"step": 2945,
|
|
"valid_targets_mean": 28765.8,
|
|
"valid_targets_min": 16515
|
|
},
|
|
{
|
|
"epoch": 3.1483457844183564,
|
|
"grad_norm": 0.5053752189040438,
|
|
"learning_rate": 0.00015198953160220312,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4326125383377075,
|
|
"step": 2950,
|
|
"valid_targets_mean": 28974.8,
|
|
"valid_targets_min": 17895
|
|
},
|
|
{
|
|
"epoch": 3.153681963713981,
|
|
"grad_norm": 0.47984183388372986,
|
|
"learning_rate": 0.00015179048462891145,
|
|
"loss": 0.4385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4363112449645996,
|
|
"step": 2955,
|
|
"valid_targets_mean": 29044.7,
|
|
"valid_targets_min": 23385
|
|
},
|
|
{
|
|
"epoch": 3.159018143009605,
|
|
"grad_norm": 0.5107486500128496,
|
|
"learning_rate": 0.00015159115685577774,
|
|
"loss": 0.4353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43246275186538696,
|
|
"step": 2960,
|
|
"valid_targets_mean": 28840.2,
|
|
"valid_targets_min": 22072
|
|
},
|
|
{
|
|
"epoch": 3.1643543223052295,
|
|
"grad_norm": 0.4836933274092199,
|
|
"learning_rate": 0.00015139154936352585,
|
|
"loss": 0.4346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43785497546195984,
|
|
"step": 2965,
|
|
"valid_targets_mean": 28919.9,
|
|
"valid_targets_min": 22145
|
|
},
|
|
{
|
|
"epoch": 3.169690501600854,
|
|
"grad_norm": 0.5543478553177675,
|
|
"learning_rate": 0.00015119166323439604,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4362708330154419,
|
|
"step": 2970,
|
|
"valid_targets_mean": 28837.1,
|
|
"valid_targets_min": 21022
|
|
},
|
|
{
|
|
"epoch": 3.175026680896478,
|
|
"grad_norm": 0.5400264191824763,
|
|
"learning_rate": 0.00015099149955213943,
|
|
"loss": 0.438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4363650977611542,
|
|
"step": 2975,
|
|
"valid_targets_mean": 28916.7,
|
|
"valid_targets_min": 23047
|
|
},
|
|
{
|
|
"epoch": 3.1803628601921026,
|
|
"grad_norm": 0.568218023832969,
|
|
"learning_rate": 0.00015079105940201198,
|
|
"loss": 0.4362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4391043782234192,
|
|
"step": 2980,
|
|
"valid_targets_mean": 29012.3,
|
|
"valid_targets_min": 21232
|
|
},
|
|
{
|
|
"epoch": 3.185699039487727,
|
|
"grad_norm": 0.531139321909535,
|
|
"learning_rate": 0.00015059034387076854,
|
|
"loss": 0.4393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4397522211074829,
|
|
"step": 2985,
|
|
"valid_targets_mean": 28765.6,
|
|
"valid_targets_min": 22435
|
|
},
|
|
{
|
|
"epoch": 3.1910352187833513,
|
|
"grad_norm": 0.4609349250065392,
|
|
"learning_rate": 0.00015038935404665705,
|
|
"loss": 0.4357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4346761703491211,
|
|
"step": 2990,
|
|
"valid_targets_mean": 28826.0,
|
|
"valid_targets_min": 21307
|
|
},
|
|
{
|
|
"epoch": 3.1963713980789756,
|
|
"grad_norm": 0.4000357269001568,
|
|
"learning_rate": 0.00015018809101941268,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43649059534072876,
|
|
"step": 2995,
|
|
"valid_targets_mean": 28806.0,
|
|
"valid_targets_min": 21706
|
|
},
|
|
{
|
|
"epoch": 3.2017075773746,
|
|
"grad_norm": 0.37697317071252967,
|
|
"learning_rate": 0.0001499865558802518,
|
|
"loss": 0.4366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43681079149246216,
|
|
"step": 3000,
|
|
"valid_targets_mean": 28887.8,
|
|
"valid_targets_min": 21593
|
|
},
|
|
{
|
|
"epoch": 3.207043756670224,
|
|
"grad_norm": 0.46675713799883556,
|
|
"learning_rate": 0.00014978474972186621,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4359115958213806,
|
|
"step": 3005,
|
|
"valid_targets_mean": 28906.7,
|
|
"valid_targets_min": 21243
|
|
},
|
|
{
|
|
"epoch": 3.2123799359658483,
|
|
"grad_norm": 0.4794146972351641,
|
|
"learning_rate": 0.00014958267363841707,
|
|
"loss": 0.4357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43594199419021606,
|
|
"step": 3010,
|
|
"valid_targets_mean": 28788.4,
|
|
"valid_targets_min": 21892
|
|
},
|
|
{
|
|
"epoch": 3.2177161152614726,
|
|
"grad_norm": 0.511084600739757,
|
|
"learning_rate": 0.00014938032872552904,
|
|
"loss": 0.4409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4422481954097748,
|
|
"step": 3015,
|
|
"valid_targets_mean": 28903.2,
|
|
"valid_targets_min": 22725
|
|
},
|
|
{
|
|
"epoch": 3.223052294557097,
|
|
"grad_norm": 0.5081539910592223,
|
|
"learning_rate": 0.0001491777160802843,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4305049777030945,
|
|
"step": 3020,
|
|
"valid_targets_mean": 28737.1,
|
|
"valid_targets_min": 17466
|
|
},
|
|
{
|
|
"epoch": 3.2283884738527213,
|
|
"grad_norm": 0.4828869142114176,
|
|
"learning_rate": 0.00014897483680121669,
|
|
"loss": 0.435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43581363558769226,
|
|
"step": 3025,
|
|
"valid_targets_mean": 28965.7,
|
|
"valid_targets_min": 20422
|
|
},
|
|
{
|
|
"epoch": 3.2337246531483457,
|
|
"grad_norm": 0.4578951471605822,
|
|
"learning_rate": 0.0001487716919883057,
|
|
"loss": 0.4368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43529069423675537,
|
|
"step": 3030,
|
|
"valid_targets_mean": 28887.0,
|
|
"valid_targets_min": 21186
|
|
},
|
|
{
|
|
"epoch": 3.23906083244397,
|
|
"grad_norm": 0.5472268575085925,
|
|
"learning_rate": 0.00014856828274297044,
|
|
"loss": 0.4377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4369294345378876,
|
|
"step": 3035,
|
|
"valid_targets_mean": 28764.9,
|
|
"valid_targets_min": 18549
|
|
},
|
|
{
|
|
"epoch": 3.2443970117395944,
|
|
"grad_norm": 0.48058959360049996,
|
|
"learning_rate": 0.00014836461016806378,
|
|
"loss": 0.4378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43576347827911377,
|
|
"step": 3040,
|
|
"valid_targets_mean": 28918.6,
|
|
"valid_targets_min": 22352
|
|
},
|
|
{
|
|
"epoch": 3.2497331910352187,
|
|
"grad_norm": 0.47631716841746413,
|
|
"learning_rate": 0.0001481606753678663,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43823981285095215,
|
|
"step": 3045,
|
|
"valid_targets_mean": 28839.2,
|
|
"valid_targets_min": 21158
|
|
},
|
|
{
|
|
"epoch": 3.255069370330843,
|
|
"grad_norm": 0.4644880270827397,
|
|
"learning_rate": 0.00014795647944808036,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43759581446647644,
|
|
"step": 3050,
|
|
"valid_targets_mean": 28845.9,
|
|
"valid_targets_min": 22918
|
|
},
|
|
{
|
|
"epoch": 3.2604055496264674,
|
|
"grad_norm": 0.5072907213602093,
|
|
"learning_rate": 0.000147752023515824,
|
|
"loss": 0.4351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4352570176124573,
|
|
"step": 3055,
|
|
"valid_targets_mean": 28938.5,
|
|
"valid_targets_min": 20322
|
|
},
|
|
{
|
|
"epoch": 3.265741728922092,
|
|
"grad_norm": 0.507176783487205,
|
|
"learning_rate": 0.00014754730867962506,
|
|
"loss": 0.4365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43387681245803833,
|
|
"step": 3060,
|
|
"valid_targets_mean": 28740.9,
|
|
"valid_targets_min": 21035
|
|
},
|
|
{
|
|
"epoch": 3.271077908217716,
|
|
"grad_norm": 0.48370474732246943,
|
|
"learning_rate": 0.00014734233604941517,
|
|
"loss": 0.4363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43365657329559326,
|
|
"step": 3065,
|
|
"valid_targets_mean": 28687.2,
|
|
"valid_targets_min": 20895
|
|
},
|
|
{
|
|
"epoch": 3.2764140875133405,
|
|
"grad_norm": 0.46332555017627197,
|
|
"learning_rate": 0.00014713710673652355,
|
|
"loss": 0.4328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4362318217754364,
|
|
"step": 3070,
|
|
"valid_targets_mean": 28952.4,
|
|
"valid_targets_min": 22099
|
|
},
|
|
{
|
|
"epoch": 3.281750266808965,
|
|
"grad_norm": 0.48378410659769616,
|
|
"learning_rate": 0.00014693162185367114,
|
|
"loss": 0.4368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43846186995506287,
|
|
"step": 3075,
|
|
"valid_targets_mean": 28972.7,
|
|
"valid_targets_min": 18619
|
|
},
|
|
{
|
|
"epoch": 3.287086446104589,
|
|
"grad_norm": 0.561520367354519,
|
|
"learning_rate": 0.00014672588251496467,
|
|
"loss": 0.4355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43675142526626587,
|
|
"step": 3080,
|
|
"valid_targets_mean": 28849.4,
|
|
"valid_targets_min": 17793
|
|
},
|
|
{
|
|
"epoch": 3.2924226254002136,
|
|
"grad_norm": 0.44156610019257575,
|
|
"learning_rate": 0.00014651988983589034,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4352341294288635,
|
|
"step": 3085,
|
|
"valid_targets_mean": 28802.1,
|
|
"valid_targets_min": 21724
|
|
},
|
|
{
|
|
"epoch": 3.297758804695838,
|
|
"grad_norm": 0.4197577896293227,
|
|
"learning_rate": 0.00014631364493330798,
|
|
"loss": 0.4348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43272507190704346,
|
|
"step": 3090,
|
|
"valid_targets_mean": 28770.6,
|
|
"valid_targets_min": 21001
|
|
},
|
|
{
|
|
"epoch": 3.3030949839914623,
|
|
"grad_norm": 0.4886419166778624,
|
|
"learning_rate": 0.00014610714892544492,
|
|
"loss": 0.4348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4325237572193146,
|
|
"step": 3095,
|
|
"valid_targets_mean": 28874.4,
|
|
"valid_targets_min": 19509
|
|
},
|
|
{
|
|
"epoch": 3.3084311632870866,
|
|
"grad_norm": 0.459899265230845,
|
|
"learning_rate": 0.00014590040293189,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4408683180809021,
|
|
"step": 3100,
|
|
"valid_targets_mean": 28998.2,
|
|
"valid_targets_min": 22881
|
|
},
|
|
{
|
|
"epoch": 3.313767342582711,
|
|
"grad_norm": 0.5373706447277511,
|
|
"learning_rate": 0.0001456934080735874,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43423405289649963,
|
|
"step": 3105,
|
|
"valid_targets_mean": 28955.8,
|
|
"valid_targets_min": 22814
|
|
},
|
|
{
|
|
"epoch": 3.3191035218783353,
|
|
"grad_norm": 0.4593122288739941,
|
|
"learning_rate": 0.00014548616547283057,
|
|
"loss": 0.436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4395608603954315,
|
|
"step": 3110,
|
|
"valid_targets_mean": 28559.3,
|
|
"valid_targets_min": 20401
|
|
},
|
|
{
|
|
"epoch": 3.3244397011739593,
|
|
"grad_norm": 0.5534304935891788,
|
|
"learning_rate": 0.0001452786762532562,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43570777773857117,
|
|
"step": 3115,
|
|
"valid_targets_mean": 28819.2,
|
|
"valid_targets_min": 17157
|
|
},
|
|
{
|
|
"epoch": 3.3297758804695836,
|
|
"grad_norm": 0.5280281388839083,
|
|
"learning_rate": 0.00014507094153983817,
|
|
"loss": 0.4352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4362625777721405,
|
|
"step": 3120,
|
|
"valid_targets_mean": 28904.0,
|
|
"valid_targets_min": 22555
|
|
},
|
|
{
|
|
"epoch": 3.335112059765208,
|
|
"grad_norm": 0.41320446076613393,
|
|
"learning_rate": 0.00014486296245888133,
|
|
"loss": 0.434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43435296416282654,
|
|
"step": 3125,
|
|
"valid_targets_mean": 28724.5,
|
|
"valid_targets_min": 19569
|
|
},
|
|
{
|
|
"epoch": 3.3404482390608323,
|
|
"grad_norm": 0.48360144671687794,
|
|
"learning_rate": 0.00014465474013801547,
|
|
"loss": 0.4329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4317402243614197,
|
|
"step": 3130,
|
|
"valid_targets_mean": 28933.7,
|
|
"valid_targets_min": 19488
|
|
},
|
|
{
|
|
"epoch": 3.3457844183564567,
|
|
"grad_norm": 0.5029351455588194,
|
|
"learning_rate": 0.00014444627570618917,
|
|
"loss": 0.4341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4321380853652954,
|
|
"step": 3135,
|
|
"valid_targets_mean": 28885.6,
|
|
"valid_targets_min": 22972
|
|
},
|
|
{
|
|
"epoch": 3.351120597652081,
|
|
"grad_norm": 0.4618742374392624,
|
|
"learning_rate": 0.00014423757029366371,
|
|
"loss": 0.4338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4360083341598511,
|
|
"step": 3140,
|
|
"valid_targets_mean": 28724.2,
|
|
"valid_targets_min": 22321
|
|
},
|
|
{
|
|
"epoch": 3.3564567769477054,
|
|
"grad_norm": 0.500587834014976,
|
|
"learning_rate": 0.00014402862503200693,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4348517954349518,
|
|
"step": 3145,
|
|
"valid_targets_mean": 28856.2,
|
|
"valid_targets_min": 20609
|
|
},
|
|
{
|
|
"epoch": 3.3617929562433297,
|
|
"grad_norm": 0.46784044054670815,
|
|
"learning_rate": 0.00014381944105408713,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4310871660709381,
|
|
"step": 3150,
|
|
"valid_targets_mean": 28724.0,
|
|
"valid_targets_min": 20282
|
|
},
|
|
{
|
|
"epoch": 3.367129135538954,
|
|
"grad_norm": 0.4805330497133465,
|
|
"learning_rate": 0.00014361001949406683,
|
|
"loss": 0.4345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4370986819267273,
|
|
"step": 3155,
|
|
"valid_targets_mean": 28870.4,
|
|
"valid_targets_min": 22529
|
|
},
|
|
{
|
|
"epoch": 3.3724653148345785,
|
|
"grad_norm": 0.557962978167396,
|
|
"learning_rate": 0.00014340036148739667,
|
|
"loss": 0.4366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.433628112077713,
|
|
"step": 3160,
|
|
"valid_targets_mean": 28784.4,
|
|
"valid_targets_min": 13864
|
|
},
|
|
{
|
|
"epoch": 3.377801494130203,
|
|
"grad_norm": 0.47468145347567137,
|
|
"learning_rate": 0.0001431904681708094,
|
|
"loss": 0.4326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43612152338027954,
|
|
"step": 3165,
|
|
"valid_targets_mean": 28685.5,
|
|
"valid_targets_min": 22206
|
|
},
|
|
{
|
|
"epoch": 3.383137673425827,
|
|
"grad_norm": 0.47367311151064023,
|
|
"learning_rate": 0.0001429803406823134,
|
|
"loss": 0.4361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4309577941894531,
|
|
"step": 3170,
|
|
"valid_targets_mean": 28744.8,
|
|
"valid_targets_min": 21212
|
|
},
|
|
{
|
|
"epoch": 3.3884738527214515,
|
|
"grad_norm": 0.4293787714289241,
|
|
"learning_rate": 0.00014276998016118683,
|
|
"loss": 0.435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43373751640319824,
|
|
"step": 3175,
|
|
"valid_targets_mean": 28691.5,
|
|
"valid_targets_min": 17108
|
|
},
|
|
{
|
|
"epoch": 3.393810032017076,
|
|
"grad_norm": 0.46229360735788555,
|
|
"learning_rate": 0.0001425593877479712,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43639492988586426,
|
|
"step": 3180,
|
|
"valid_targets_mean": 28808.1,
|
|
"valid_targets_min": 21564
|
|
},
|
|
{
|
|
"epoch": 3.3991462113127002,
|
|
"grad_norm": 0.502035175620512,
|
|
"learning_rate": 0.0001423485645844655,
|
|
"loss": 0.434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4358476996421814,
|
|
"step": 3185,
|
|
"valid_targets_mean": 28687.7,
|
|
"valid_targets_min": 18846
|
|
},
|
|
{
|
|
"epoch": 3.4044823906083246,
|
|
"grad_norm": 0.40960475991145945,
|
|
"learning_rate": 0.0001421375118137196,
|
|
"loss": 0.4327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4317413568496704,
|
|
"step": 3190,
|
|
"valid_targets_mean": 28913.0,
|
|
"valid_targets_min": 23478
|
|
},
|
|
{
|
|
"epoch": 3.409818569903949,
|
|
"grad_norm": 0.4349549275319588,
|
|
"learning_rate": 0.00014192623058002833,
|
|
"loss": 0.4338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4319398105144501,
|
|
"step": 3195,
|
|
"valid_targets_mean": 28981.6,
|
|
"valid_targets_min": 22411
|
|
},
|
|
{
|
|
"epoch": 3.4151547491995733,
|
|
"grad_norm": 0.4854158214544209,
|
|
"learning_rate": 0.00014171472202892526,
|
|
"loss": 0.4329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4335978627204895,
|
|
"step": 3200,
|
|
"valid_targets_mean": 28860.3,
|
|
"valid_targets_min": 21640
|
|
},
|
|
{
|
|
"epoch": 3.420490928495197,
|
|
"grad_norm": 0.44519855071377373,
|
|
"learning_rate": 0.00014150298730717642,
|
|
"loss": 0.4352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43621671199798584,
|
|
"step": 3205,
|
|
"valid_targets_mean": 29066.8,
|
|
"valid_targets_min": 21390
|
|
},
|
|
{
|
|
"epoch": 3.4258271077908216,
|
|
"grad_norm": 0.507964411024077,
|
|
"learning_rate": 0.00014129102756277404,
|
|
"loss": 0.4344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4369572699069977,
|
|
"step": 3210,
|
|
"valid_targets_mean": 28842.6,
|
|
"valid_targets_min": 20748
|
|
},
|
|
{
|
|
"epoch": 3.431163287086446,
|
|
"grad_norm": 0.45705877101910725,
|
|
"learning_rate": 0.00014107884394493045,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43259197473526,
|
|
"step": 3215,
|
|
"valid_targets_mean": 28897.4,
|
|
"valid_targets_min": 22273
|
|
},
|
|
{
|
|
"epoch": 3.4364994663820703,
|
|
"grad_norm": 0.5951471488936144,
|
|
"learning_rate": 0.00014086643760407184,
|
|
"loss": 0.4359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4341096580028534,
|
|
"step": 3220,
|
|
"valid_targets_mean": 28996.3,
|
|
"valid_targets_min": 18485
|
|
},
|
|
{
|
|
"epoch": 3.4418356456776946,
|
|
"grad_norm": 0.44496514528606296,
|
|
"learning_rate": 0.00014065380969183178,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4341355860233307,
|
|
"step": 3225,
|
|
"valid_targets_mean": 28753.5,
|
|
"valid_targets_min": 18329
|
|
},
|
|
{
|
|
"epoch": 3.447171824973319,
|
|
"grad_norm": 0.4751836858048521,
|
|
"learning_rate": 0.00014044096136104538,
|
|
"loss": 0.4328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43249812722206116,
|
|
"step": 3230,
|
|
"valid_targets_mean": 29014.2,
|
|
"valid_targets_min": 21394
|
|
},
|
|
{
|
|
"epoch": 3.4525080042689433,
|
|
"grad_norm": 0.4418432182242359,
|
|
"learning_rate": 0.0001402278937657427,
|
|
"loss": 0.4323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43450066447257996,
|
|
"step": 3235,
|
|
"valid_targets_mean": 28716.2,
|
|
"valid_targets_min": 20397
|
|
},
|
|
{
|
|
"epoch": 3.4578441835645677,
|
|
"grad_norm": 0.436845201857216,
|
|
"learning_rate": 0.0001400146080611426,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43189460039138794,
|
|
"step": 3240,
|
|
"valid_targets_mean": 28895.9,
|
|
"valid_targets_min": 20984
|
|
},
|
|
{
|
|
"epoch": 3.463180362860192,
|
|
"grad_norm": 0.49129311118631425,
|
|
"learning_rate": 0.00013980110540364664,
|
|
"loss": 0.4326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43245309591293335,
|
|
"step": 3245,
|
|
"valid_targets_mean": 28779.6,
|
|
"valid_targets_min": 18347
|
|
},
|
|
{
|
|
"epoch": 3.4685165421558164,
|
|
"grad_norm": 0.41406368494446794,
|
|
"learning_rate": 0.00013958738695083253,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42854857444763184,
|
|
"step": 3250,
|
|
"valid_targets_mean": 29027.9,
|
|
"valid_targets_min": 19358
|
|
},
|
|
{
|
|
"epoch": 3.4738527214514408,
|
|
"grad_norm": 0.3914897379980043,
|
|
"learning_rate": 0.00013937345386144797,
|
|
"loss": 0.4322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4313948154449463,
|
|
"step": 3255,
|
|
"valid_targets_mean": 28844.7,
|
|
"valid_targets_min": 19376
|
|
},
|
|
{
|
|
"epoch": 3.479188900747065,
|
|
"grad_norm": 0.44171395273171776,
|
|
"learning_rate": 0.00013915930729540448,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43444424867630005,
|
|
"step": 3260,
|
|
"valid_targets_mean": 28860.9,
|
|
"valid_targets_min": 22790
|
|
},
|
|
{
|
|
"epoch": 3.4845250800426895,
|
|
"grad_norm": 0.44474343829313423,
|
|
"learning_rate": 0.00013894494841377102,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4332994520664215,
|
|
"step": 3265,
|
|
"valid_targets_mean": 28780.2,
|
|
"valid_targets_min": 18196
|
|
},
|
|
{
|
|
"epoch": 3.489861259338314,
|
|
"grad_norm": 0.49347413137837826,
|
|
"learning_rate": 0.00013873037837876758,
|
|
"loss": 0.4317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43504035472869873,
|
|
"step": 3270,
|
|
"valid_targets_mean": 28805.4,
|
|
"valid_targets_min": 19505
|
|
},
|
|
{
|
|
"epoch": 3.495197438633938,
|
|
"grad_norm": 0.43703657771662985,
|
|
"learning_rate": 0.0001385155983537591,
|
|
"loss": 0.4353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4368758797645569,
|
|
"step": 3275,
|
|
"valid_targets_mean": 28873.7,
|
|
"valid_targets_min": 22760
|
|
},
|
|
{
|
|
"epoch": 3.5005336179295625,
|
|
"grad_norm": 0.49549736446056913,
|
|
"learning_rate": 0.00013830060950324898,
|
|
"loss": 0.4326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42575693130493164,
|
|
"step": 3280,
|
|
"valid_targets_mean": 28792.0,
|
|
"valid_targets_min": 22707
|
|
},
|
|
{
|
|
"epoch": 3.505869797225187,
|
|
"grad_norm": 0.4582256590179247,
|
|
"learning_rate": 0.0001380854129928729,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4306070804595947,
|
|
"step": 3285,
|
|
"valid_targets_mean": 28942.3,
|
|
"valid_targets_min": 21332
|
|
},
|
|
{
|
|
"epoch": 3.5112059765208112,
|
|
"grad_norm": 0.40229320048606765,
|
|
"learning_rate": 0.0001378700099893924,
|
|
"loss": 0.4346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4349821209907532,
|
|
"step": 3290,
|
|
"valid_targets_mean": 28724.8,
|
|
"valid_targets_min": 19366
|
|
},
|
|
{
|
|
"epoch": 3.5165421558164356,
|
|
"grad_norm": 0.42753744321130677,
|
|
"learning_rate": 0.0001376544016606886,
|
|
"loss": 0.431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4316447973251343,
|
|
"step": 3295,
|
|
"valid_targets_mean": 28727.2,
|
|
"valid_targets_min": 19929
|
|
},
|
|
{
|
|
"epoch": 3.52187833511206,
|
|
"grad_norm": 0.41143315982773954,
|
|
"learning_rate": 0.00013743858917575588,
|
|
"loss": 0.4323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4331210255622864,
|
|
"step": 3300,
|
|
"valid_targets_mean": 28951.2,
|
|
"valid_targets_min": 23322
|
|
},
|
|
{
|
|
"epoch": 3.5272145144076843,
|
|
"grad_norm": 0.3661504334505945,
|
|
"learning_rate": 0.00013722257370469545,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43106207251548767,
|
|
"step": 3305,
|
|
"valid_targets_mean": 28848.0,
|
|
"valid_targets_min": 20950
|
|
},
|
|
{
|
|
"epoch": 3.5325506937033087,
|
|
"grad_norm": 0.3145847072932158,
|
|
"learning_rate": 0.00013700635641870923,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4317154288291931,
|
|
"step": 3310,
|
|
"valid_targets_mean": 28951.6,
|
|
"valid_targets_min": 22482
|
|
},
|
|
{
|
|
"epoch": 3.537886872998933,
|
|
"grad_norm": 0.3760485517392338,
|
|
"learning_rate": 0.00013678993849009313,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43668973445892334,
|
|
"step": 3315,
|
|
"valid_targets_mean": 28854.2,
|
|
"valid_targets_min": 20523
|
|
},
|
|
{
|
|
"epoch": 3.5432230522945574,
|
|
"grad_norm": 0.33028254222060127,
|
|
"learning_rate": 0.00013657332109223117,
|
|
"loss": 0.434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4305395781993866,
|
|
"step": 3320,
|
|
"valid_targets_mean": 28878.3,
|
|
"valid_targets_min": 21290
|
|
},
|
|
{
|
|
"epoch": 3.5485592315901813,
|
|
"grad_norm": 0.5105044696911828,
|
|
"learning_rate": 0.00013635650539958863,
|
|
"loss": 0.4328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4329110383987427,
|
|
"step": 3325,
|
|
"valid_targets_mean": 28852.5,
|
|
"valid_targets_min": 20940
|
|
},
|
|
{
|
|
"epoch": 3.5538954108858056,
|
|
"grad_norm": 0.44178193667691457,
|
|
"learning_rate": 0.00013613949258770608,
|
|
"loss": 0.4326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4327603578567505,
|
|
"step": 3330,
|
|
"valid_targets_mean": 28809.5,
|
|
"valid_targets_min": 22051
|
|
},
|
|
{
|
|
"epoch": 3.55923159018143,
|
|
"grad_norm": 0.4592497401974742,
|
|
"learning_rate": 0.00013592228383319273,
|
|
"loss": 0.4332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43314966559410095,
|
|
"step": 3335,
|
|
"valid_targets_mean": 28734.6,
|
|
"valid_targets_min": 21478
|
|
},
|
|
{
|
|
"epoch": 3.5645677694770543,
|
|
"grad_norm": 0.39984373491737396,
|
|
"learning_rate": 0.00013570488031372022,
|
|
"loss": 0.4331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4362226128578186,
|
|
"step": 3340,
|
|
"valid_targets_mean": 28880.2,
|
|
"valid_targets_min": 22299
|
|
},
|
|
{
|
|
"epoch": 3.5699039487726787,
|
|
"grad_norm": 0.42971496493126365,
|
|
"learning_rate": 0.00013548728320801612,
|
|
"loss": 0.4338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43295514583587646,
|
|
"step": 3345,
|
|
"valid_targets_mean": 28929.0,
|
|
"valid_targets_min": 23270
|
|
},
|
|
{
|
|
"epoch": 3.575240128068303,
|
|
"grad_norm": 0.46650323926697734,
|
|
"learning_rate": 0.00013526949369585764,
|
|
"loss": 0.4314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4373741149902344,
|
|
"step": 3350,
|
|
"valid_targets_mean": 29128.0,
|
|
"valid_targets_min": 22056
|
|
},
|
|
{
|
|
"epoch": 3.5805763073639274,
|
|
"grad_norm": 0.48225276813971074,
|
|
"learning_rate": 0.0001350515129580652,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43129345774650574,
|
|
"step": 3355,
|
|
"valid_targets_mean": 28706.7,
|
|
"valid_targets_min": 17828
|
|
},
|
|
{
|
|
"epoch": 3.5859124866595518,
|
|
"grad_norm": 0.4629045352920466,
|
|
"learning_rate": 0.000134833342176496,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4332507252693176,
|
|
"step": 3360,
|
|
"valid_targets_mean": 28870.7,
|
|
"valid_targets_min": 21083
|
|
},
|
|
{
|
|
"epoch": 3.591248665955176,
|
|
"grad_norm": 0.47206526382533237,
|
|
"learning_rate": 0.00013461498253403758,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4355852007865906,
|
|
"step": 3365,
|
|
"valid_targets_mean": 28735.8,
|
|
"valid_targets_min": 18046
|
|
},
|
|
{
|
|
"epoch": 3.5965848452508005,
|
|
"grad_norm": 0.4700787626748754,
|
|
"learning_rate": 0.00013439643521460153,
|
|
"loss": 0.43,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43339934945106506,
|
|
"step": 3370,
|
|
"valid_targets_mean": 28750.6,
|
|
"valid_targets_min": 19141
|
|
},
|
|
{
|
|
"epoch": 3.601921024546425,
|
|
"grad_norm": 0.43350780960940405,
|
|
"learning_rate": 0.00013417770140311693,
|
|
"loss": 0.4315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4338434338569641,
|
|
"step": 3375,
|
|
"valid_targets_mean": 28719.0,
|
|
"valid_targets_min": 19379
|
|
},
|
|
{
|
|
"epoch": 3.607257203842049,
|
|
"grad_norm": 0.4013004042105287,
|
|
"learning_rate": 0.000133958782285524,
|
|
"loss": 0.4304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4283779561519623,
|
|
"step": 3380,
|
|
"valid_targets_mean": 28811.4,
|
|
"valid_targets_min": 18503
|
|
},
|
|
{
|
|
"epoch": 3.6125933831376735,
|
|
"grad_norm": 0.5388393269800589,
|
|
"learning_rate": 0.00013373967904876775,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4304119050502777,
|
|
"step": 3385,
|
|
"valid_targets_mean": 29018.2,
|
|
"valid_targets_min": 21958
|
|
},
|
|
{
|
|
"epoch": 3.617929562433298,
|
|
"grad_norm": 0.4057390217196227,
|
|
"learning_rate": 0.00013352039288079133,
|
|
"loss": 0.4306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4319956600666046,
|
|
"step": 3390,
|
|
"valid_targets_mean": 28919.5,
|
|
"valid_targets_min": 22140
|
|
},
|
|
{
|
|
"epoch": 3.6232657417289222,
|
|
"grad_norm": 0.5368401785672807,
|
|
"learning_rate": 0.00013330092497052976,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.433579683303833,
|
|
"step": 3395,
|
|
"valid_targets_mean": 28931.2,
|
|
"valid_targets_min": 19698
|
|
},
|
|
{
|
|
"epoch": 3.628601921024546,
|
|
"grad_norm": 0.36185761939563554,
|
|
"learning_rate": 0.00013308127650790352,
|
|
"loss": 0.4319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43094661831855774,
|
|
"step": 3400,
|
|
"valid_targets_mean": 28791.9,
|
|
"valid_targets_min": 18610
|
|
},
|
|
{
|
|
"epoch": 3.6339381003201705,
|
|
"grad_norm": 0.4425115168030993,
|
|
"learning_rate": 0.00013286144868381188,
|
|
"loss": 0.4325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4321828782558441,
|
|
"step": 3405,
|
|
"valid_targets_mean": 28859.5,
|
|
"valid_targets_min": 17625
|
|
},
|
|
{
|
|
"epoch": 3.639274279615795,
|
|
"grad_norm": 0.5914395427100793,
|
|
"learning_rate": 0.0001326414426901266,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4337488114833832,
|
|
"step": 3410,
|
|
"valid_targets_mean": 28888.0,
|
|
"valid_targets_min": 22467
|
|
},
|
|
{
|
|
"epoch": 3.644610458911419,
|
|
"grad_norm": 0.4870561175085104,
|
|
"learning_rate": 0.00013242125971968556,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42824339866638184,
|
|
"step": 3415,
|
|
"valid_targets_mean": 28959.8,
|
|
"valid_targets_min": 21801
|
|
},
|
|
{
|
|
"epoch": 3.6499466382070436,
|
|
"grad_norm": 0.44613044833777615,
|
|
"learning_rate": 0.00013220090096628616,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43413907289505005,
|
|
"step": 3420,
|
|
"valid_targets_mean": 28731.1,
|
|
"valid_targets_min": 21438
|
|
},
|
|
{
|
|
"epoch": 3.655282817502668,
|
|
"grad_norm": 0.4094552612110865,
|
|
"learning_rate": 0.00013198036762467868,
|
|
"loss": 0.432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4328404664993286,
|
|
"step": 3425,
|
|
"valid_targets_mean": 28759.0,
|
|
"valid_targets_min": 21885
|
|
},
|
|
{
|
|
"epoch": 3.6606189967982923,
|
|
"grad_norm": 0.4966656728086518,
|
|
"learning_rate": 0.00013175966089056025,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4348108172416687,
|
|
"step": 3430,
|
|
"valid_targets_mean": 28708.3,
|
|
"valid_targets_min": 22466
|
|
},
|
|
{
|
|
"epoch": 3.6659551760939166,
|
|
"grad_norm": 0.4369493068182657,
|
|
"learning_rate": 0.00013153878196056788,
|
|
"loss": 0.4304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4332895278930664,
|
|
"step": 3435,
|
|
"valid_targets_mean": 28939.6,
|
|
"valid_targets_min": 21070
|
|
},
|
|
{
|
|
"epoch": 3.671291355389541,
|
|
"grad_norm": 0.445549024199376,
|
|
"learning_rate": 0.00013131773203227238,
|
|
"loss": 0.4304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43516385555267334,
|
|
"step": 3440,
|
|
"valid_targets_mean": 28753.2,
|
|
"valid_targets_min": 22085
|
|
},
|
|
{
|
|
"epoch": 3.6766275346851653,
|
|
"grad_norm": 0.48475802506018867,
|
|
"learning_rate": 0.00013109651230417158,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42581403255462646,
|
|
"step": 3445,
|
|
"valid_targets_mean": 28848.2,
|
|
"valid_targets_min": 22898
|
|
},
|
|
{
|
|
"epoch": 3.6819637139807897,
|
|
"grad_norm": 0.41403337615437824,
|
|
"learning_rate": 0.000130875123975684,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4342615604400635,
|
|
"step": 3450,
|
|
"valid_targets_mean": 28758.9,
|
|
"valid_targets_min": 20344
|
|
},
|
|
{
|
|
"epoch": 3.687299893276414,
|
|
"grad_norm": 0.469971436134684,
|
|
"learning_rate": 0.00013065356824714218,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4311075210571289,
|
|
"step": 3455,
|
|
"valid_targets_mean": 28824.2,
|
|
"valid_targets_min": 20941
|
|
},
|
|
{
|
|
"epoch": 3.6926360725720384,
|
|
"grad_norm": 0.4678101527655715,
|
|
"learning_rate": 0.0001304318463197864,
|
|
"loss": 0.4303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42798611521720886,
|
|
"step": 3460,
|
|
"valid_targets_mean": 28862.6,
|
|
"valid_targets_min": 19023
|
|
},
|
|
{
|
|
"epoch": 3.6979722518676628,
|
|
"grad_norm": 0.408590337655623,
|
|
"learning_rate": 0.00013020995939575801,
|
|
"loss": 0.4293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4305270314216614,
|
|
"step": 3465,
|
|
"valid_targets_mean": 28937.8,
|
|
"valid_targets_min": 23337
|
|
},
|
|
{
|
|
"epoch": 3.703308431163287,
|
|
"grad_norm": 0.4656795617970675,
|
|
"learning_rate": 0.0001299879086780929,
|
|
"loss": 0.4286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4290810525417328,
|
|
"step": 3470,
|
|
"valid_targets_mean": 28860.1,
|
|
"valid_targets_min": 19674
|
|
},
|
|
{
|
|
"epoch": 3.7086446104589115,
|
|
"grad_norm": 0.5658992971599166,
|
|
"learning_rate": 0.000129765695370715,
|
|
"loss": 0.4305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4292992353439331,
|
|
"step": 3475,
|
|
"valid_targets_mean": 28804.4,
|
|
"valid_targets_min": 17714
|
|
},
|
|
{
|
|
"epoch": 3.713980789754536,
|
|
"grad_norm": 0.39311007426573547,
|
|
"learning_rate": 0.00012954332067842994,
|
|
"loss": 0.4308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42862972617149353,
|
|
"step": 3480,
|
|
"valid_targets_mean": 28798.6,
|
|
"valid_targets_min": 22104
|
|
},
|
|
{
|
|
"epoch": 3.71931696905016,
|
|
"grad_norm": 0.3542095011377809,
|
|
"learning_rate": 0.00012932078580691817,
|
|
"loss": 0.4325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4315662384033203,
|
|
"step": 3485,
|
|
"valid_targets_mean": 28961.7,
|
|
"valid_targets_min": 22930
|
|
},
|
|
{
|
|
"epoch": 3.7246531483457845,
|
|
"grad_norm": 0.4028606369164767,
|
|
"learning_rate": 0.00012909809196272865,
|
|
"loss": 0.4289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42671799659729004,
|
|
"step": 3490,
|
|
"valid_targets_mean": 28998.3,
|
|
"valid_targets_min": 21922
|
|
},
|
|
{
|
|
"epoch": 3.729989327641409,
|
|
"grad_norm": 0.44110145771370024,
|
|
"learning_rate": 0.00012887524035327233,
|
|
"loss": 0.4309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4318047761917114,
|
|
"step": 3495,
|
|
"valid_targets_mean": 28855.4,
|
|
"valid_targets_min": 23180
|
|
},
|
|
{
|
|
"epoch": 3.7353255069370332,
|
|
"grad_norm": 0.4445599952668899,
|
|
"learning_rate": 0.00012865223218681555,
|
|
"loss": 0.4316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4287042021751404,
|
|
"step": 3500,
|
|
"valid_targets_mean": 28774.2,
|
|
"valid_targets_min": 22317
|
|
},
|
|
{
|
|
"epoch": 3.7406616862326576,
|
|
"grad_norm": 0.4660901172079049,
|
|
"learning_rate": 0.0001284290686724733,
|
|
"loss": 0.4278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42891907691955566,
|
|
"step": 3505,
|
|
"valid_targets_mean": 28782.6,
|
|
"valid_targets_min": 19569
|
|
},
|
|
{
|
|
"epoch": 3.745997865528282,
|
|
"grad_norm": 0.39478144276395094,
|
|
"learning_rate": 0.00012820575102020308,
|
|
"loss": 0.4291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42791077494621277,
|
|
"step": 3510,
|
|
"valid_targets_mean": 28934.6,
|
|
"valid_targets_min": 21724
|
|
},
|
|
{
|
|
"epoch": 3.7513340448239063,
|
|
"grad_norm": 0.4498652413048794,
|
|
"learning_rate": 0.00012798228044079795,
|
|
"loss": 0.4301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4289936423301697,
|
|
"step": 3515,
|
|
"valid_targets_mean": 28785.5,
|
|
"valid_targets_min": 22540
|
|
},
|
|
{
|
|
"epoch": 3.7566702241195307,
|
|
"grad_norm": 0.37725205122505695,
|
|
"learning_rate": 0.0001277586581458802,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42873474955558777,
|
|
"step": 3520,
|
|
"valid_targets_mean": 28793.5,
|
|
"valid_targets_min": 20282
|
|
},
|
|
{
|
|
"epoch": 3.762006403415155,
|
|
"grad_norm": 0.3851394073217626,
|
|
"learning_rate": 0.00012753488534789458,
|
|
"loss": 0.429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4283927083015442,
|
|
"step": 3525,
|
|
"valid_targets_mean": 28736.1,
|
|
"valid_targets_min": 20171
|
|
},
|
|
{
|
|
"epoch": 3.767342582710779,
|
|
"grad_norm": 0.3892498423056188,
|
|
"learning_rate": 0.00012731096326010194,
|
|
"loss": 0.4314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4264993667602539,
|
|
"step": 3530,
|
|
"valid_targets_mean": 28871.7,
|
|
"valid_targets_min": 20214
|
|
},
|
|
{
|
|
"epoch": 3.7726787620064033,
|
|
"grad_norm": 0.37455931834224676,
|
|
"learning_rate": 0.00012708689309657255,
|
|
"loss": 0.4322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42934340238571167,
|
|
"step": 3535,
|
|
"valid_targets_mean": 28909.0,
|
|
"valid_targets_min": 17573
|
|
},
|
|
{
|
|
"epoch": 3.7780149413020276,
|
|
"grad_norm": 0.4683631982078189,
|
|
"learning_rate": 0.00012686267607217954,
|
|
"loss": 0.4277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4246726632118225,
|
|
"step": 3540,
|
|
"valid_targets_mean": 28774.7,
|
|
"valid_targets_min": 17779
|
|
},
|
|
{
|
|
"epoch": 3.783351120597652,
|
|
"grad_norm": 0.3621086551412537,
|
|
"learning_rate": 0.00012663831340259222,
|
|
"loss": 0.4307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.425648033618927,
|
|
"step": 3545,
|
|
"valid_targets_mean": 28843.8,
|
|
"valid_targets_min": 22373
|
|
},
|
|
{
|
|
"epoch": 3.7886872998932764,
|
|
"grad_norm": 0.4451990706228817,
|
|
"learning_rate": 0.00012641380630426963,
|
|
"loss": 0.4307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4279008209705353,
|
|
"step": 3550,
|
|
"valid_targets_mean": 28949.8,
|
|
"valid_targets_min": 22022
|
|
},
|
|
{
|
|
"epoch": 3.7940234791889007,
|
|
"grad_norm": 0.4288991779222557,
|
|
"learning_rate": 0.00012618915599445387,
|
|
"loss": 0.4282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42941054701805115,
|
|
"step": 3555,
|
|
"valid_targets_mean": 28808.6,
|
|
"valid_targets_min": 17481
|
|
},
|
|
{
|
|
"epoch": 3.799359658484525,
|
|
"grad_norm": 0.39144371757232044,
|
|
"learning_rate": 0.00012596436369116348,
|
|
"loss": 0.4291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4281049966812134,
|
|
"step": 3560,
|
|
"valid_targets_mean": 28940.1,
|
|
"valid_targets_min": 22951
|
|
},
|
|
{
|
|
"epoch": 3.8046958377801494,
|
|
"grad_norm": 0.34309843042840693,
|
|
"learning_rate": 0.00012573943061318686,
|
|
"loss": 0.4276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4261920750141144,
|
|
"step": 3565,
|
|
"valid_targets_mean": 28768.5,
|
|
"valid_targets_min": 18305
|
|
},
|
|
{
|
|
"epoch": 3.8100320170757738,
|
|
"grad_norm": 0.42140784731783154,
|
|
"learning_rate": 0.00012551435798007572,
|
|
"loss": 0.4285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4318602383136749,
|
|
"step": 3570,
|
|
"valid_targets_mean": 28735.1,
|
|
"valid_targets_min": 21980
|
|
},
|
|
{
|
|
"epoch": 3.815368196371398,
|
|
"grad_norm": 0.5214249498817489,
|
|
"learning_rate": 0.0001252891470121384,
|
|
"loss": 0.4298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4317033886909485,
|
|
"step": 3575,
|
|
"valid_targets_mean": 29068.1,
|
|
"valid_targets_min": 22748
|
|
},
|
|
{
|
|
"epoch": 3.8207043756670225,
|
|
"grad_norm": 0.47742449376075385,
|
|
"learning_rate": 0.00012506379893043328,
|
|
"loss": 0.4302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43194687366485596,
|
|
"step": 3580,
|
|
"valid_targets_mean": 28783.8,
|
|
"valid_targets_min": 21027
|
|
},
|
|
{
|
|
"epoch": 3.826040554962647,
|
|
"grad_norm": 0.467160743712367,
|
|
"learning_rate": 0.00012483831495676206,
|
|
"loss": 0.4294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42947202920913696,
|
|
"step": 3585,
|
|
"valid_targets_mean": 28860.5,
|
|
"valid_targets_min": 23224
|
|
},
|
|
{
|
|
"epoch": 3.831376734258271,
|
|
"grad_norm": 0.4232740930262326,
|
|
"learning_rate": 0.00012461269631366338,
|
|
"loss": 0.4294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4264180064201355,
|
|
"step": 3590,
|
|
"valid_targets_mean": 28832.7,
|
|
"valid_targets_min": 16812
|
|
},
|
|
{
|
|
"epoch": 3.8367129135538955,
|
|
"grad_norm": 0.4080318700702358,
|
|
"learning_rate": 0.00012438694422440587,
|
|
"loss": 0.4293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.426061749458313,
|
|
"step": 3595,
|
|
"valid_targets_mean": 28853.5,
|
|
"valid_targets_min": 18470
|
|
},
|
|
{
|
|
"epoch": 3.84204909284952,
|
|
"grad_norm": 0.48170988188457003,
|
|
"learning_rate": 0.0001241610599129818,
|
|
"loss": 0.429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4270153045654297,
|
|
"step": 3600,
|
|
"valid_targets_mean": 28885.4,
|
|
"valid_targets_min": 21015
|
|
},
|
|
{
|
|
"epoch": 3.847385272145144,
|
|
"grad_norm": 0.3693315233172458,
|
|
"learning_rate": 0.0001239350446041003,
|
|
"loss": 0.4291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4311561584472656,
|
|
"step": 3605,
|
|
"valid_targets_mean": 28833.6,
|
|
"valid_targets_min": 23388
|
|
},
|
|
{
|
|
"epoch": 3.852721451440768,
|
|
"grad_norm": 0.3484926805840226,
|
|
"learning_rate": 0.00012370889952318076,
|
|
"loss": 0.4289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42880764603614807,
|
|
"step": 3610,
|
|
"valid_targets_mean": 28830.9,
|
|
"valid_targets_min": 21580
|
|
},
|
|
{
|
|
"epoch": 3.8580576307363925,
|
|
"grad_norm": 0.4335923435807277,
|
|
"learning_rate": 0.0001234826258963461,
|
|
"loss": 0.4289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42800524830818176,
|
|
"step": 3615,
|
|
"valid_targets_mean": 28794.4,
|
|
"valid_targets_min": 21666
|
|
},
|
|
{
|
|
"epoch": 3.863393810032017,
|
|
"grad_norm": 0.40222107492117587,
|
|
"learning_rate": 0.00012325622495041626,
|
|
"loss": 0.4304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4281304180622101,
|
|
"step": 3620,
|
|
"valid_targets_mean": 28763.7,
|
|
"valid_targets_min": 20819
|
|
},
|
|
{
|
|
"epoch": 3.8687299893276412,
|
|
"grad_norm": 0.3666339498052566,
|
|
"learning_rate": 0.00012302969791290152,
|
|
"loss": 0.4273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42554640769958496,
|
|
"step": 3625,
|
|
"valid_targets_mean": 28769.5,
|
|
"valid_targets_min": 17798
|
|
},
|
|
{
|
|
"epoch": 3.8740661686232656,
|
|
"grad_norm": 0.3378606634579012,
|
|
"learning_rate": 0.00012280304601199567,
|
|
"loss": 0.43,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4291246235370636,
|
|
"step": 3630,
|
|
"valid_targets_mean": 28854.0,
|
|
"valid_targets_min": 19509
|
|
},
|
|
{
|
|
"epoch": 3.87940234791889,
|
|
"grad_norm": 0.479120271244163,
|
|
"learning_rate": 0.0001225762704765697,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42979249358177185,
|
|
"step": 3635,
|
|
"valid_targets_mean": 28731.5,
|
|
"valid_targets_min": 21401
|
|
},
|
|
{
|
|
"epoch": 3.8847385272145143,
|
|
"grad_norm": 0.3862382523616369,
|
|
"learning_rate": 0.0001223493725361647,
|
|
"loss": 0.4283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42928624153137207,
|
|
"step": 3640,
|
|
"valid_targets_mean": 28895.1,
|
|
"valid_targets_min": 21913
|
|
},
|
|
{
|
|
"epoch": 3.8900747065101386,
|
|
"grad_norm": 0.42703226920414883,
|
|
"learning_rate": 0.00012212235342098563,
|
|
"loss": 0.429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42561978101730347,
|
|
"step": 3645,
|
|
"valid_targets_mean": 28699.3,
|
|
"valid_targets_min": 22699
|
|
},
|
|
{
|
|
"epoch": 3.895410885805763,
|
|
"grad_norm": 0.36739046228569433,
|
|
"learning_rate": 0.00012189521436189427,
|
|
"loss": 0.4274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4263993799686432,
|
|
"step": 3650,
|
|
"valid_targets_mean": 28909.2,
|
|
"valid_targets_min": 18879
|
|
},
|
|
{
|
|
"epoch": 3.9007470651013874,
|
|
"grad_norm": 0.3442698921561772,
|
|
"learning_rate": 0.00012166795659040278,
|
|
"loss": 0.429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4265468716621399,
|
|
"step": 3655,
|
|
"valid_targets_mean": 28808.9,
|
|
"valid_targets_min": 20693
|
|
},
|
|
{
|
|
"epoch": 3.9060832443970117,
|
|
"grad_norm": 0.3491683713142071,
|
|
"learning_rate": 0.00012144058133866696,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.427855908870697,
|
|
"step": 3660,
|
|
"valid_targets_mean": 28925.4,
|
|
"valid_targets_min": 22296
|
|
},
|
|
{
|
|
"epoch": 3.911419423692636,
|
|
"grad_norm": 0.39679933829431024,
|
|
"learning_rate": 0.00012121308983947963,
|
|
"loss": 0.4296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.430743932723999,
|
|
"step": 3665,
|
|
"valid_targets_mean": 28823.0,
|
|
"valid_targets_min": 19558
|
|
},
|
|
{
|
|
"epoch": 3.9167556029882604,
|
|
"grad_norm": 0.3808937270514705,
|
|
"learning_rate": 0.00012098548332626383,
|
|
"loss": 0.4278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42767736315727234,
|
|
"step": 3670,
|
|
"valid_targets_mean": 28807.6,
|
|
"valid_targets_min": 19538
|
|
},
|
|
{
|
|
"epoch": 3.9220917822838848,
|
|
"grad_norm": 0.4126907832167869,
|
|
"learning_rate": 0.00012075776303306612,
|
|
"loss": 0.428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42879045009613037,
|
|
"step": 3675,
|
|
"valid_targets_mean": 28829.6,
|
|
"valid_targets_min": 20457
|
|
},
|
|
{
|
|
"epoch": 3.927427961579509,
|
|
"grad_norm": 0.40542244391196997,
|
|
"learning_rate": 0.00012052993019455007,
|
|
"loss": 0.4252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4236496686935425,
|
|
"step": 3680,
|
|
"valid_targets_mean": 28944.2,
|
|
"valid_targets_min": 20050
|
|
},
|
|
{
|
|
"epoch": 3.9327641408751335,
|
|
"grad_norm": 0.43489706446937715,
|
|
"learning_rate": 0.00012030198604598943,
|
|
"loss": 0.4267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42238980531692505,
|
|
"step": 3685,
|
|
"valid_targets_mean": 28980.7,
|
|
"valid_targets_min": 21260
|
|
},
|
|
{
|
|
"epoch": 3.938100320170758,
|
|
"grad_norm": 0.4574175498837884,
|
|
"learning_rate": 0.00012007393182326136,
|
|
"loss": 0.4268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4301767945289612,
|
|
"step": 3690,
|
|
"valid_targets_mean": 28830.7,
|
|
"valid_targets_min": 20032
|
|
},
|
|
{
|
|
"epoch": 3.943436499466382,
|
|
"grad_norm": 0.5302303603287535,
|
|
"learning_rate": 0.00011984576876284,
|
|
"loss": 0.4276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4286668300628662,
|
|
"step": 3695,
|
|
"valid_targets_mean": 28710.5,
|
|
"valid_targets_min": 23020
|
|
},
|
|
{
|
|
"epoch": 3.9487726787620065,
|
|
"grad_norm": 0.42762805509272644,
|
|
"learning_rate": 0.00011961749810178943,
|
|
"loss": 0.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42783212661743164,
|
|
"step": 3700,
|
|
"valid_targets_mean": 28913.8,
|
|
"valid_targets_min": 21591
|
|
},
|
|
{
|
|
"epoch": 3.954108858057631,
|
|
"grad_norm": 0.3691856294166198,
|
|
"learning_rate": 0.00011938912107775717,
|
|
"loss": 0.4273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43037647008895874,
|
|
"step": 3705,
|
|
"valid_targets_mean": 29041.2,
|
|
"valid_targets_min": 23615
|
|
},
|
|
{
|
|
"epoch": 3.9594450373532553,
|
|
"grad_norm": 0.38810841873282675,
|
|
"learning_rate": 0.00011916063892896748,
|
|
"loss": 0.428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42605531215667725,
|
|
"step": 3710,
|
|
"valid_targets_mean": 28782.8,
|
|
"valid_targets_min": 19439
|
|
},
|
|
{
|
|
"epoch": 3.9647812166488796,
|
|
"grad_norm": 0.39822305684399667,
|
|
"learning_rate": 0.00011893205289421451,
|
|
"loss": 0.4277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42825180292129517,
|
|
"step": 3715,
|
|
"valid_targets_mean": 28692.7,
|
|
"valid_targets_min": 20633
|
|
},
|
|
{
|
|
"epoch": 3.970117395944504,
|
|
"grad_norm": 0.4158264081833239,
|
|
"learning_rate": 0.00011870336421285571,
|
|
"loss": 0.4286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4284394383430481,
|
|
"step": 3720,
|
|
"valid_targets_mean": 28782.1,
|
|
"valid_targets_min": 22529
|
|
},
|
|
{
|
|
"epoch": 3.9754535752401283,
|
|
"grad_norm": 0.3831811007794275,
|
|
"learning_rate": 0.00011847457412480505,
|
|
"loss": 0.426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42615097761154175,
|
|
"step": 3725,
|
|
"valid_targets_mean": 28755.5,
|
|
"valid_targets_min": 19295
|
|
},
|
|
{
|
|
"epoch": 3.9807897545357527,
|
|
"grad_norm": 0.4322480562579156,
|
|
"learning_rate": 0.00011824568387052637,
|
|
"loss": 0.4288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4323875904083252,
|
|
"step": 3730,
|
|
"valid_targets_mean": 28810.8,
|
|
"valid_targets_min": 19214
|
|
},
|
|
{
|
|
"epoch": 3.9861259338313766,
|
|
"grad_norm": 0.4694712272117525,
|
|
"learning_rate": 0.00011801669469102643,
|
|
"loss": 0.427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42549771070480347,
|
|
"step": 3735,
|
|
"valid_targets_mean": 28917.2,
|
|
"valid_targets_min": 22918
|
|
},
|
|
{
|
|
"epoch": 3.991462113127001,
|
|
"grad_norm": 0.3824415387016526,
|
|
"learning_rate": 0.00011778760782784855,
|
|
"loss": 0.4273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4290057122707367,
|
|
"step": 3740,
|
|
"valid_targets_mean": 28810.4,
|
|
"valid_targets_min": 19733
|
|
},
|
|
{
|
|
"epoch": 3.9967982924226253,
|
|
"grad_norm": 0.40822041699163486,
|
|
"learning_rate": 0.00011755842452306556,
|
|
"loss": 0.4273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42833641171455383,
|
|
"step": 3745,
|
|
"valid_targets_mean": 28969.8,
|
|
"valid_targets_min": 19636
|
|
},
|
|
{
|
|
"epoch": 4.00213447171825,
|
|
"grad_norm": 0.43314627908205433,
|
|
"learning_rate": 0.00011732914601927318,
|
|
"loss": 0.4239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4214213490486145,
|
|
"step": 3750,
|
|
"valid_targets_mean": 28785.9,
|
|
"valid_targets_min": 18581
|
|
},
|
|
{
|
|
"epoch": 4.0074706510138745,
|
|
"grad_norm": 0.4918842262705215,
|
|
"learning_rate": 0.00011709977355958333,
|
|
"loss": 0.4218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4230083227157593,
|
|
"step": 3755,
|
|
"valid_targets_mean": 29002.1,
|
|
"valid_targets_min": 22426
|
|
},
|
|
{
|
|
"epoch": 4.012806830309499,
|
|
"grad_norm": 0.3559040975229061,
|
|
"learning_rate": 0.00011687030838761732,
|
|
"loss": 0.4232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42185312509536743,
|
|
"step": 3760,
|
|
"valid_targets_mean": 28564.4,
|
|
"valid_targets_min": 18666
|
|
},
|
|
{
|
|
"epoch": 4.018143009605123,
|
|
"grad_norm": 0.3642085963533532,
|
|
"learning_rate": 0.00011664075174749911,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4223750829696655,
|
|
"step": 3765,
|
|
"valid_targets_mean": 28891.1,
|
|
"valid_targets_min": 23061
|
|
},
|
|
{
|
|
"epoch": 4.023479188900747,
|
|
"grad_norm": 0.3704274289489233,
|
|
"learning_rate": 0.00011641110488384866,
|
|
"loss": 0.4239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4247993528842926,
|
|
"step": 3770,
|
|
"valid_targets_mean": 28590.5,
|
|
"valid_targets_min": 19213
|
|
},
|
|
{
|
|
"epoch": 4.028815368196371,
|
|
"grad_norm": 0.3455884907079301,
|
|
"learning_rate": 0.00011618136904177501,
|
|
"loss": 0.4225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4216018319129944,
|
|
"step": 3775,
|
|
"valid_targets_mean": 28903.8,
|
|
"valid_targets_min": 22329
|
|
},
|
|
{
|
|
"epoch": 4.034151547491995,
|
|
"grad_norm": 0.3701656356814854,
|
|
"learning_rate": 0.00011595154546686969,
|
|
"loss": 0.4236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4252583086490631,
|
|
"step": 3780,
|
|
"valid_targets_mean": 28877.5,
|
|
"valid_targets_min": 22872
|
|
},
|
|
{
|
|
"epoch": 4.03948772678762,
|
|
"grad_norm": 0.3552312372354896,
|
|
"learning_rate": 0.00011572163540519986,
|
|
"loss": 0.4228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4176941514015198,
|
|
"step": 3785,
|
|
"valid_targets_mean": 28807.6,
|
|
"valid_targets_min": 20174
|
|
},
|
|
{
|
|
"epoch": 4.044823906083244,
|
|
"grad_norm": 0.41540332012123204,
|
|
"learning_rate": 0.00011549164010330171,
|
|
"loss": 0.4227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42301157116889954,
|
|
"step": 3790,
|
|
"valid_targets_mean": 28687.2,
|
|
"valid_targets_min": 20284
|
|
},
|
|
{
|
|
"epoch": 4.050160085378868,
|
|
"grad_norm": 0.4493140408504873,
|
|
"learning_rate": 0.00011526156080817341,
|
|
"loss": 0.4239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4221842885017395,
|
|
"step": 3795,
|
|
"valid_targets_mean": 28992.8,
|
|
"valid_targets_min": 21441
|
|
},
|
|
{
|
|
"epoch": 4.055496264674493,
|
|
"grad_norm": 0.38540924658447634,
|
|
"learning_rate": 0.00011503139876726863,
|
|
"loss": 0.4248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42516863346099854,
|
|
"step": 3800,
|
|
"valid_targets_mean": 28847.5,
|
|
"valid_targets_min": 20921
|
|
},
|
|
{
|
|
"epoch": 4.060832443970117,
|
|
"grad_norm": 0.3325527893251533,
|
|
"learning_rate": 0.00011480115522848973,
|
|
"loss": 0.4244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4216970205307007,
|
|
"step": 3805,
|
|
"valid_targets_mean": 28792.4,
|
|
"valid_targets_min": 19147
|
|
},
|
|
{
|
|
"epoch": 4.0661686232657415,
|
|
"grad_norm": 0.364134580511168,
|
|
"learning_rate": 0.00011457083144018083,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4217662215232849,
|
|
"step": 3810,
|
|
"valid_targets_mean": 28953.0,
|
|
"valid_targets_min": 21609
|
|
},
|
|
{
|
|
"epoch": 4.071504802561366,
|
|
"grad_norm": 0.3685373613484052,
|
|
"learning_rate": 0.0001143404286511212,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42246201634407043,
|
|
"step": 3815,
|
|
"valid_targets_mean": 29024.4,
|
|
"valid_targets_min": 22957
|
|
},
|
|
{
|
|
"epoch": 4.07684098185699,
|
|
"grad_norm": 0.4020308844754011,
|
|
"learning_rate": 0.00011410994811051847,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42001721262931824,
|
|
"step": 3820,
|
|
"valid_targets_mean": 28833.5,
|
|
"valid_targets_min": 22585
|
|
},
|
|
{
|
|
"epoch": 4.0821771611526145,
|
|
"grad_norm": 0.42478665479006933,
|
|
"learning_rate": 0.00011387939106800176,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4186967611312866,
|
|
"step": 3825,
|
|
"valid_targets_mean": 28579.3,
|
|
"valid_targets_min": 20000
|
|
},
|
|
{
|
|
"epoch": 4.087513340448239,
|
|
"grad_norm": 0.39595288583207905,
|
|
"learning_rate": 0.00011364875877361496,
|
|
"loss": 0.4225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4193423390388489,
|
|
"step": 3830,
|
|
"valid_targets_mean": 28737.9,
|
|
"valid_targets_min": 22925
|
|
},
|
|
{
|
|
"epoch": 4.092849519743863,
|
|
"grad_norm": 0.4172708937499721,
|
|
"learning_rate": 0.00011341805247781008,
|
|
"loss": 0.4204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.424191415309906,
|
|
"step": 3835,
|
|
"valid_targets_mean": 28890.9,
|
|
"valid_targets_min": 23193
|
|
},
|
|
{
|
|
"epoch": 4.098185699039488,
|
|
"grad_norm": 0.3468813838294797,
|
|
"learning_rate": 0.00011318727343144026,
|
|
"loss": 0.4233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4230159819126129,
|
|
"step": 3840,
|
|
"valid_targets_mean": 28757.8,
|
|
"valid_targets_min": 22532
|
|
},
|
|
{
|
|
"epoch": 4.103521878335112,
|
|
"grad_norm": 0.3356951709404877,
|
|
"learning_rate": 0.00011295642288575307,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4207121729850769,
|
|
"step": 3845,
|
|
"valid_targets_mean": 28780.7,
|
|
"valid_targets_min": 19833
|
|
},
|
|
{
|
|
"epoch": 4.108858057630736,
|
|
"grad_norm": 0.39019958866424675,
|
|
"learning_rate": 0.00011272550209238387,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41962960362434387,
|
|
"step": 3850,
|
|
"valid_targets_mean": 28784.2,
|
|
"valid_targets_min": 22929
|
|
},
|
|
{
|
|
"epoch": 4.114194236926361,
|
|
"grad_norm": 0.4065746126214435,
|
|
"learning_rate": 0.00011249451230334867,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4226855933666229,
|
|
"step": 3855,
|
|
"valid_targets_mean": 28658.2,
|
|
"valid_targets_min": 21102
|
|
},
|
|
{
|
|
"epoch": 4.119530416221985,
|
|
"grad_norm": 0.3479054849722043,
|
|
"learning_rate": 0.00011226345477103773,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4219398498535156,
|
|
"step": 3860,
|
|
"valid_targets_mean": 28897.2,
|
|
"valid_targets_min": 23308
|
|
},
|
|
{
|
|
"epoch": 4.124866595517609,
|
|
"grad_norm": 0.348931221614668,
|
|
"learning_rate": 0.00011203233074820858,
|
|
"loss": 0.4216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41948723793029785,
|
|
"step": 3865,
|
|
"valid_targets_mean": 28637.0,
|
|
"valid_targets_min": 19468
|
|
},
|
|
{
|
|
"epoch": 4.130202774813234,
|
|
"grad_norm": 0.36593684546771144,
|
|
"learning_rate": 0.00011180114148797922,
|
|
"loss": 0.4237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42400026321411133,
|
|
"step": 3870,
|
|
"valid_targets_mean": 28747.5,
|
|
"valid_targets_min": 15351
|
|
},
|
|
{
|
|
"epoch": 4.135538954108858,
|
|
"grad_norm": 0.4094998797537509,
|
|
"learning_rate": 0.00011156988824382135,
|
|
"loss": 0.4228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4267050623893738,
|
|
"step": 3875,
|
|
"valid_targets_mean": 28884.7,
|
|
"valid_targets_min": 20049
|
|
},
|
|
{
|
|
"epoch": 4.140875133404482,
|
|
"grad_norm": 0.42888137632041157,
|
|
"learning_rate": 0.00011133857226955364,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42192575335502625,
|
|
"step": 3880,
|
|
"valid_targets_mean": 28725.5,
|
|
"valid_targets_min": 22448
|
|
},
|
|
{
|
|
"epoch": 4.146211312700107,
|
|
"grad_norm": 0.3826298745496494,
|
|
"learning_rate": 0.00011110719481933481,
|
|
"loss": 0.4235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4205487072467804,
|
|
"step": 3885,
|
|
"valid_targets_mean": 28963.6,
|
|
"valid_targets_min": 20484
|
|
},
|
|
{
|
|
"epoch": 4.151547491995731,
|
|
"grad_norm": 0.34768145283571444,
|
|
"learning_rate": 0.00011087575714765688,
|
|
"loss": 0.4234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42684638500213623,
|
|
"step": 3890,
|
|
"valid_targets_mean": 28793.0,
|
|
"valid_targets_min": 20882
|
|
},
|
|
{
|
|
"epoch": 4.1568836712913555,
|
|
"grad_norm": 0.4514800651060072,
|
|
"learning_rate": 0.00011064426050933847,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4261409044265747,
|
|
"step": 3895,
|
|
"valid_targets_mean": 28916.7,
|
|
"valid_targets_min": 20010
|
|
},
|
|
{
|
|
"epoch": 4.16221985058698,
|
|
"grad_norm": 0.33804355302083333,
|
|
"learning_rate": 0.00011041270615951782,
|
|
"loss": 0.4223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4239078164100647,
|
|
"step": 3900,
|
|
"valid_targets_mean": 28696.5,
|
|
"valid_targets_min": 21884
|
|
},
|
|
{
|
|
"epoch": 4.167556029882604,
|
|
"grad_norm": 0.4073212141419539,
|
|
"learning_rate": 0.00011018109535364613,
|
|
"loss": 0.421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42531710863113403,
|
|
"step": 3905,
|
|
"valid_targets_mean": 28859.9,
|
|
"valid_targets_min": 21850
|
|
},
|
|
{
|
|
"epoch": 4.172892209178229,
|
|
"grad_norm": 0.3976136524251578,
|
|
"learning_rate": 0.00010994942934748065,
|
|
"loss": 0.4211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42030707001686096,
|
|
"step": 3910,
|
|
"valid_targets_mean": 28898.3,
|
|
"valid_targets_min": 20825
|
|
},
|
|
{
|
|
"epoch": 4.178228388473853,
|
|
"grad_norm": 0.42144827688365827,
|
|
"learning_rate": 0.00010971770939707795,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42350465059280396,
|
|
"step": 3915,
|
|
"valid_targets_mean": 28714.0,
|
|
"valid_targets_min": 20705
|
|
},
|
|
{
|
|
"epoch": 4.183564567769477,
|
|
"grad_norm": 0.3446658933506483,
|
|
"learning_rate": 0.00010948593675878706,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.424017995595932,
|
|
"step": 3920,
|
|
"valid_targets_mean": 28684.7,
|
|
"valid_targets_min": 21850
|
|
},
|
|
{
|
|
"epoch": 4.188900747065102,
|
|
"grad_norm": 0.4424627391052105,
|
|
"learning_rate": 0.00010925411268924268,
|
|
"loss": 0.4205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4225308299064636,
|
|
"step": 3925,
|
|
"valid_targets_mean": 28854.1,
|
|
"valid_targets_min": 22507
|
|
},
|
|
{
|
|
"epoch": 4.194236926360726,
|
|
"grad_norm": 0.37456413232780433,
|
|
"learning_rate": 0.00010902223844535839,
|
|
"loss": 0.4224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41586148738861084,
|
|
"step": 3930,
|
|
"valid_targets_mean": 28878.6,
|
|
"valid_targets_min": 21420
|
|
},
|
|
{
|
|
"epoch": 4.19957310565635,
|
|
"grad_norm": 0.3916860861831356,
|
|
"learning_rate": 0.00010879031528431975,
|
|
"loss": 0.42,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41759392619132996,
|
|
"step": 3935,
|
|
"valid_targets_mean": 28883.2,
|
|
"valid_targets_min": 22592
|
|
},
|
|
{
|
|
"epoch": 4.204909284951975,
|
|
"grad_norm": 0.37570024791802914,
|
|
"learning_rate": 0.0001085583444635776,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.424510657787323,
|
|
"step": 3940,
|
|
"valid_targets_mean": 28893.8,
|
|
"valid_targets_min": 21433
|
|
},
|
|
{
|
|
"epoch": 4.210245464247599,
|
|
"grad_norm": 0.39543235368925594,
|
|
"learning_rate": 0.00010832632724084113,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41936933994293213,
|
|
"step": 3945,
|
|
"valid_targets_mean": 29016.8,
|
|
"valid_targets_min": 18748
|
|
},
|
|
{
|
|
"epoch": 4.215581643543223,
|
|
"grad_norm": 0.38859752670036407,
|
|
"learning_rate": 0.00010809426487407115,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41779953241348267,
|
|
"step": 3950,
|
|
"valid_targets_mean": 28779.7,
|
|
"valid_targets_min": 15149
|
|
},
|
|
{
|
|
"epoch": 4.220917822838848,
|
|
"grad_norm": 0.3868922043328974,
|
|
"learning_rate": 0.0001078621586214732,
|
|
"loss": 0.4225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42176300287246704,
|
|
"step": 3955,
|
|
"valid_targets_mean": 28857.1,
|
|
"valid_targets_min": 21853
|
|
},
|
|
{
|
|
"epoch": 4.226254002134472,
|
|
"grad_norm": 0.31184443878340706,
|
|
"learning_rate": 0.00010763000974149084,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42075514793395996,
|
|
"step": 3960,
|
|
"valid_targets_mean": 28865.0,
|
|
"valid_targets_min": 20290
|
|
},
|
|
{
|
|
"epoch": 4.2315901814300965,
|
|
"grad_norm": 0.380274975601504,
|
|
"learning_rate": 0.00010739781949279865,
|
|
"loss": 0.4204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41909322142601013,
|
|
"step": 3965,
|
|
"valid_targets_mean": 28894.3,
|
|
"valid_targets_min": 21058
|
|
},
|
|
{
|
|
"epoch": 4.23692636072572,
|
|
"grad_norm": 0.4121484134744739,
|
|
"learning_rate": 0.00010716558913429557,
|
|
"loss": 0.4222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4200228452682495,
|
|
"step": 3970,
|
|
"valid_targets_mean": 28861.2,
|
|
"valid_targets_min": 21194
|
|
},
|
|
{
|
|
"epoch": 4.242262540021345,
|
|
"grad_norm": 0.4097411357038756,
|
|
"learning_rate": 0.00010693331992509801,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4192362427711487,
|
|
"step": 3975,
|
|
"valid_targets_mean": 28831.3,
|
|
"valid_targets_min": 21057
|
|
},
|
|
{
|
|
"epoch": 4.247598719316969,
|
|
"grad_norm": 0.4190985418435361,
|
|
"learning_rate": 0.00010670101312453293,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42083290219306946,
|
|
"step": 3980,
|
|
"valid_targets_mean": 28756.5,
|
|
"valid_targets_min": 21893
|
|
},
|
|
{
|
|
"epoch": 4.252934898612593,
|
|
"grad_norm": 0.4786577449254182,
|
|
"learning_rate": 0.00010646866999213123,
|
|
"loss": 0.4203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4252513647079468,
|
|
"step": 3985,
|
|
"valid_targets_mean": 28908.6,
|
|
"valid_targets_min": 20296
|
|
},
|
|
{
|
|
"epoch": 4.258271077908217,
|
|
"grad_norm": 0.3489029085337842,
|
|
"learning_rate": 0.00010623629178762073,
|
|
"loss": 0.4218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4199264645576477,
|
|
"step": 3990,
|
|
"valid_targets_mean": 28804.0,
|
|
"valid_targets_min": 20802
|
|
},
|
|
{
|
|
"epoch": 4.263607257203842,
|
|
"grad_norm": 0.4604933163875057,
|
|
"learning_rate": 0.00010600387977091944,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4209303557872772,
|
|
"step": 3995,
|
|
"valid_targets_mean": 28832.9,
|
|
"valid_targets_min": 22547
|
|
},
|
|
{
|
|
"epoch": 4.268943436499466,
|
|
"grad_norm": 0.38380160113267353,
|
|
"learning_rate": 0.00010577143520212865,
|
|
"loss": 0.4238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4269213080406189,
|
|
"step": 4000,
|
|
"valid_targets_mean": 28669.2,
|
|
"valid_targets_min": 19214
|
|
},
|
|
{
|
|
"epoch": 4.27427961579509,
|
|
"grad_norm": 0.40747812293986796,
|
|
"learning_rate": 0.00010553895934152613,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42184334993362427,
|
|
"step": 4005,
|
|
"valid_targets_mean": 28920.7,
|
|
"valid_targets_min": 17251
|
|
},
|
|
{
|
|
"epoch": 4.279615795090715,
|
|
"grad_norm": 0.3752844436503374,
|
|
"learning_rate": 0.0001053064534495594,
|
|
"loss": 0.4204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4206317067146301,
|
|
"step": 4010,
|
|
"valid_targets_mean": 28755.0,
|
|
"valid_targets_min": 22352
|
|
},
|
|
{
|
|
"epoch": 4.284951974386339,
|
|
"grad_norm": 0.4518199767432239,
|
|
"learning_rate": 0.00010507391878683867,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42242133617401123,
|
|
"step": 4015,
|
|
"valid_targets_mean": 28927.4,
|
|
"valid_targets_min": 22818
|
|
},
|
|
{
|
|
"epoch": 4.2902881536819635,
|
|
"grad_norm": 0.3475919231149287,
|
|
"learning_rate": 0.00010484135661413032,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4208674430847168,
|
|
"step": 4020,
|
|
"valid_targets_mean": 28732.6,
|
|
"valid_targets_min": 20510
|
|
},
|
|
{
|
|
"epoch": 4.295624332977588,
|
|
"grad_norm": 0.4235774556631322,
|
|
"learning_rate": 0.00010460876819234972,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4238291084766388,
|
|
"step": 4025,
|
|
"valid_targets_mean": 28766.6,
|
|
"valid_targets_min": 18717
|
|
},
|
|
{
|
|
"epoch": 4.300960512273212,
|
|
"grad_norm": 0.42439288693085914,
|
|
"learning_rate": 0.00010437615478255464,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41955137252807617,
|
|
"step": 4030,
|
|
"valid_targets_mean": 28905.3,
|
|
"valid_targets_min": 20991
|
|
},
|
|
{
|
|
"epoch": 4.3062966915688365,
|
|
"grad_norm": 0.4058575188077753,
|
|
"learning_rate": 0.0001041435176459383,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4235634207725525,
|
|
"step": 4035,
|
|
"valid_targets_mean": 29025.3,
|
|
"valid_targets_min": 22493
|
|
},
|
|
{
|
|
"epoch": 4.311632870864461,
|
|
"grad_norm": 0.347511414517583,
|
|
"learning_rate": 0.00010391085804382258,
|
|
"loss": 0.4193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4162161946296692,
|
|
"step": 4040,
|
|
"valid_targets_mean": 28853.1,
|
|
"valid_targets_min": 22919
|
|
},
|
|
{
|
|
"epoch": 4.316969050160085,
|
|
"grad_norm": 0.3575533250439514,
|
|
"learning_rate": 0.00010367817723765118,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41767430305480957,
|
|
"step": 4045,
|
|
"valid_targets_mean": 28817.6,
|
|
"valid_targets_min": 21231
|
|
},
|
|
{
|
|
"epoch": 4.32230522945571,
|
|
"grad_norm": 0.3776651010242567,
|
|
"learning_rate": 0.00010344547648898268,
|
|
"loss": 0.4212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41819173097610474,
|
|
"step": 4050,
|
|
"valid_targets_mean": 28915.1,
|
|
"valid_targets_min": 19795
|
|
},
|
|
{
|
|
"epoch": 4.327641408751334,
|
|
"grad_norm": 0.4060559950771988,
|
|
"learning_rate": 0.00010321275705948395,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.422052800655365,
|
|
"step": 4055,
|
|
"valid_targets_mean": 28808.5,
|
|
"valid_targets_min": 19111
|
|
},
|
|
{
|
|
"epoch": 4.332977588046958,
|
|
"grad_norm": 0.3279553395225615,
|
|
"learning_rate": 0.00010298002021092295,
|
|
"loss": 0.4211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42231857776641846,
|
|
"step": 4060,
|
|
"valid_targets_mean": 28935.0,
|
|
"valid_targets_min": 21699
|
|
},
|
|
{
|
|
"epoch": 4.338313767342583,
|
|
"grad_norm": 0.4027106897077968,
|
|
"learning_rate": 0.00010274726720516224,
|
|
"loss": 0.4222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4235973358154297,
|
|
"step": 4065,
|
|
"valid_targets_mean": 28856.1,
|
|
"valid_targets_min": 20560
|
|
},
|
|
{
|
|
"epoch": 4.343649946638207,
|
|
"grad_norm": 0.3173931093590618,
|
|
"learning_rate": 0.00010251449930415189,
|
|
"loss": 0.4211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42240917682647705,
|
|
"step": 4070,
|
|
"valid_targets_mean": 28815.0,
|
|
"valid_targets_min": 20116
|
|
},
|
|
{
|
|
"epoch": 4.348986125933831,
|
|
"grad_norm": 0.3570827061362892,
|
|
"learning_rate": 0.00010228171776992277,
|
|
"loss": 0.4198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42050933837890625,
|
|
"step": 4075,
|
|
"valid_targets_mean": 28649.3,
|
|
"valid_targets_min": 22583
|
|
},
|
|
{
|
|
"epoch": 4.354322305229456,
|
|
"grad_norm": 0.36354519154513204,
|
|
"learning_rate": 0.00010204892386457963,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4194909632205963,
|
|
"step": 4080,
|
|
"valid_targets_mean": 28704.4,
|
|
"valid_targets_min": 20318
|
|
},
|
|
{
|
|
"epoch": 4.35965848452508,
|
|
"grad_norm": 0.39015491619158854,
|
|
"learning_rate": 0.00010181611885029437,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4181373417377472,
|
|
"step": 4085,
|
|
"valid_targets_mean": 28879.2,
|
|
"valid_targets_min": 19574
|
|
},
|
|
{
|
|
"epoch": 4.3649946638207044,
|
|
"grad_norm": 0.3839553114737321,
|
|
"learning_rate": 0.00010158330398929907,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4242491126060486,
|
|
"step": 4090,
|
|
"valid_targets_mean": 28708.3,
|
|
"valid_targets_min": 17719
|
|
},
|
|
{
|
|
"epoch": 4.370330843116329,
|
|
"grad_norm": 0.44199414398240855,
|
|
"learning_rate": 0.00010135048054387913,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42398786544799805,
|
|
"step": 4095,
|
|
"valid_targets_mean": 28836.2,
|
|
"valid_targets_min": 20619
|
|
},
|
|
{
|
|
"epoch": 4.375667022411953,
|
|
"grad_norm": 0.3618767207365597,
|
|
"learning_rate": 0.00010111764977636662,
|
|
"loss": 0.4198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42410895228385925,
|
|
"step": 4100,
|
|
"valid_targets_mean": 28895.4,
|
|
"valid_targets_min": 19004
|
|
},
|
|
{
|
|
"epoch": 4.3810032017075775,
|
|
"grad_norm": 0.36698779978922264,
|
|
"learning_rate": 0.00010088481294913325,
|
|
"loss": 0.4194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4190062880516052,
|
|
"step": 4105,
|
|
"valid_targets_mean": 28961.4,
|
|
"valid_targets_min": 22115
|
|
},
|
|
{
|
|
"epoch": 4.386339381003202,
|
|
"grad_norm": 0.3596786969412505,
|
|
"learning_rate": 0.00010065197132458355,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4156681001186371,
|
|
"step": 4110,
|
|
"valid_targets_mean": 29058.2,
|
|
"valid_targets_min": 23876
|
|
},
|
|
{
|
|
"epoch": 4.391675560298826,
|
|
"grad_norm": 0.3523444002878811,
|
|
"learning_rate": 0.00010041912616514814,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42014560103416443,
|
|
"step": 4115,
|
|
"valid_targets_mean": 28964.7,
|
|
"valid_targets_min": 21459
|
|
},
|
|
{
|
|
"epoch": 4.397011739594451,
|
|
"grad_norm": 0.40111593679590957,
|
|
"learning_rate": 0.00010018627873327677,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42068126797676086,
|
|
"step": 4120,
|
|
"valid_targets_mean": 28766.8,
|
|
"valid_targets_min": 20172
|
|
},
|
|
{
|
|
"epoch": 4.402347918890075,
|
|
"grad_norm": 0.40337010512586796,
|
|
"learning_rate": 9.995343029143144e-05,
|
|
"loss": 0.4204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41805556416511536,
|
|
"step": 4125,
|
|
"valid_targets_mean": 28770.8,
|
|
"valid_targets_min": 20890
|
|
},
|
|
{
|
|
"epoch": 4.407684098185699,
|
|
"grad_norm": 0.3658947575955679,
|
|
"learning_rate": 9.972058210207977e-05,
|
|
"loss": 0.42,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4142877757549286,
|
|
"step": 4130,
|
|
"valid_targets_mean": 28966.2,
|
|
"valid_targets_min": 20850
|
|
},
|
|
{
|
|
"epoch": 4.413020277481324,
|
|
"grad_norm": 0.30876775620978403,
|
|
"learning_rate": 9.948773542768784e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41614896059036255,
|
|
"step": 4135,
|
|
"valid_targets_mean": 28884.3,
|
|
"valid_targets_min": 22054
|
|
},
|
|
{
|
|
"epoch": 4.418356456776948,
|
|
"grad_norm": 0.4322421389015756,
|
|
"learning_rate": 9.925489153071368e-05,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41993236541748047,
|
|
"step": 4140,
|
|
"valid_targets_mean": 28718.8,
|
|
"valid_targets_min": 21421
|
|
},
|
|
{
|
|
"epoch": 4.423692636072572,
|
|
"grad_norm": 0.3549190852580818,
|
|
"learning_rate": 9.902205167360015e-05,
|
|
"loss": 0.4185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4208112061023712,
|
|
"step": 4145,
|
|
"valid_targets_mean": 28854.2,
|
|
"valid_targets_min": 19793
|
|
},
|
|
{
|
|
"epoch": 4.429028815368197,
|
|
"grad_norm": 0.40680341585192115,
|
|
"learning_rate": 9.878921711876828e-05,
|
|
"loss": 0.4193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41954243183135986,
|
|
"step": 4150,
|
|
"valid_targets_mean": 28818.1,
|
|
"valid_targets_min": 18085
|
|
},
|
|
{
|
|
"epoch": 4.434364994663821,
|
|
"grad_norm": 0.32415051353565083,
|
|
"learning_rate": 9.855638912861026e-05,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4222108721733093,
|
|
"step": 4155,
|
|
"valid_targets_mean": 28776.8,
|
|
"valid_targets_min": 22162
|
|
},
|
|
{
|
|
"epoch": 4.439701173959445,
|
|
"grad_norm": 0.40712073556446066,
|
|
"learning_rate": 9.832356896548279e-05,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41958823800086975,
|
|
"step": 4160,
|
|
"valid_targets_mean": 28875.7,
|
|
"valid_targets_min": 19902
|
|
},
|
|
{
|
|
"epoch": 4.44503735325507,
|
|
"grad_norm": 0.326745826240599,
|
|
"learning_rate": 9.809075789170009e-05,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4246634840965271,
|
|
"step": 4165,
|
|
"valid_targets_mean": 28828.1,
|
|
"valid_targets_min": 17376
|
|
},
|
|
{
|
|
"epoch": 4.450373532550694,
|
|
"grad_norm": 0.3374293665013668,
|
|
"learning_rate": 9.785795716952704e-05,
|
|
"loss": 0.4205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4239436388015747,
|
|
"step": 4170,
|
|
"valid_targets_mean": 28768.5,
|
|
"valid_targets_min": 21819
|
|
},
|
|
{
|
|
"epoch": 4.455709711846318,
|
|
"grad_norm": 0.37830214672521734,
|
|
"learning_rate": 9.76251680611725e-05,
|
|
"loss": 0.4204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4206947684288025,
|
|
"step": 4175,
|
|
"valid_targets_mean": 28782.2,
|
|
"valid_targets_min": 21215
|
|
},
|
|
{
|
|
"epoch": 4.461045891141943,
|
|
"grad_norm": 0.3745343990155674,
|
|
"learning_rate": 9.739239182878228e-05,
|
|
"loss": 0.4195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4192778468132019,
|
|
"step": 4180,
|
|
"valid_targets_mean": 28579.8,
|
|
"valid_targets_min": 18932
|
|
},
|
|
{
|
|
"epoch": 4.466382070437566,
|
|
"grad_norm": 0.32899941991572457,
|
|
"learning_rate": 9.715962973443244e-05,
|
|
"loss": 0.4217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.423557311296463,
|
|
"step": 4185,
|
|
"valid_targets_mean": 28701.1,
|
|
"valid_targets_min": 20241
|
|
},
|
|
{
|
|
"epoch": 4.471718249733191,
|
|
"grad_norm": 0.2994773603134494,
|
|
"learning_rate": 9.692688304012239e-05,
|
|
"loss": 0.4199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4179522693157196,
|
|
"step": 4190,
|
|
"valid_targets_mean": 28791.0,
|
|
"valid_targets_min": 17497
|
|
},
|
|
{
|
|
"epoch": 4.477054429028815,
|
|
"grad_norm": 0.39070167349145646,
|
|
"learning_rate": 9.669415300776791e-05,
|
|
"loss": 0.4193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.421237975358963,
|
|
"step": 4195,
|
|
"valid_targets_mean": 28806.9,
|
|
"valid_targets_min": 17517
|
|
},
|
|
{
|
|
"epoch": 4.482390608324439,
|
|
"grad_norm": 0.3398067579522288,
|
|
"learning_rate": 9.646144089919462e-05,
|
|
"loss": 0.4211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4213641285896301,
|
|
"step": 4200,
|
|
"valid_targets_mean": 28845.2,
|
|
"valid_targets_min": 23682
|
|
},
|
|
{
|
|
"epoch": 4.487726787620064,
|
|
"grad_norm": 0.36298214897813275,
|
|
"learning_rate": 9.622874797613084e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4160724878311157,
|
|
"step": 4205,
|
|
"valid_targets_mean": 28931.6,
|
|
"valid_targets_min": 20098
|
|
},
|
|
{
|
|
"epoch": 4.493062966915688,
|
|
"grad_norm": 0.3358526558198807,
|
|
"learning_rate": 9.599607550020097e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41969814896583557,
|
|
"step": 4210,
|
|
"valid_targets_mean": 28897.4,
|
|
"valid_targets_min": 23228
|
|
},
|
|
{
|
|
"epoch": 4.498399146211312,
|
|
"grad_norm": 0.292914484584559,
|
|
"learning_rate": 9.576342473291842e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4190637469291687,
|
|
"step": 4215,
|
|
"valid_targets_mean": 28789.5,
|
|
"valid_targets_min": 22550
|
|
},
|
|
{
|
|
"epoch": 4.503735325506937,
|
|
"grad_norm": 0.37867376396765706,
|
|
"learning_rate": 9.5530796935679e-05,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4196445345878601,
|
|
"step": 4220,
|
|
"valid_targets_mean": 28898.7,
|
|
"valid_targets_min": 21128
|
|
},
|
|
{
|
|
"epoch": 4.509071504802561,
|
|
"grad_norm": 0.34320896429190056,
|
|
"learning_rate": 9.529819336975399e-05,
|
|
"loss": 0.4192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4201467037200928,
|
|
"step": 4225,
|
|
"valid_targets_mean": 28947.5,
|
|
"valid_targets_min": 21968
|
|
},
|
|
{
|
|
"epoch": 4.5144076840981855,
|
|
"grad_norm": 0.3345778358392203,
|
|
"learning_rate": 9.506561529628315e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4164847135543823,
|
|
"step": 4230,
|
|
"valid_targets_mean": 28720.5,
|
|
"valid_targets_min": 19838
|
|
},
|
|
{
|
|
"epoch": 4.51974386339381,
|
|
"grad_norm": 0.42696060130478747,
|
|
"learning_rate": 9.48330639762682e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4239192605018616,
|
|
"step": 4235,
|
|
"valid_targets_mean": 28780.6,
|
|
"valid_targets_min": 21973
|
|
},
|
|
{
|
|
"epoch": 4.525080042689434,
|
|
"grad_norm": 0.3493891285321995,
|
|
"learning_rate": 9.46005406705657e-05,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41531574726104736,
|
|
"step": 4240,
|
|
"valid_targets_mean": 28854.0,
|
|
"valid_targets_min": 20820
|
|
},
|
|
{
|
|
"epoch": 4.5304162219850586,
|
|
"grad_norm": 0.4017298856118809,
|
|
"learning_rate": 9.436804663988041e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4187561273574829,
|
|
"step": 4245,
|
|
"valid_targets_mean": 28727.1,
|
|
"valid_targets_min": 20099
|
|
},
|
|
{
|
|
"epoch": 4.535752401280683,
|
|
"grad_norm": 0.34885821775638554,
|
|
"learning_rate": 9.413558314475825e-05,
|
|
"loss": 0.4213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4193626046180725,
|
|
"step": 4250,
|
|
"valid_targets_mean": 28871.8,
|
|
"valid_targets_min": 19352
|
|
},
|
|
{
|
|
"epoch": 4.541088580576307,
|
|
"grad_norm": 0.3550438111233903,
|
|
"learning_rate": 9.390315144557961e-05,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41961002349853516,
|
|
"step": 4255,
|
|
"valid_targets_mean": 28847.8,
|
|
"valid_targets_min": 21715
|
|
},
|
|
{
|
|
"epoch": 4.546424759871932,
|
|
"grad_norm": 0.35789280524944217,
|
|
"learning_rate": 9.367075280255262e-05,
|
|
"loss": 0.4187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41878896951675415,
|
|
"step": 4260,
|
|
"valid_targets_mean": 29012.0,
|
|
"valid_targets_min": 20482
|
|
},
|
|
{
|
|
"epoch": 4.551760939167556,
|
|
"grad_norm": 0.3234103601058616,
|
|
"learning_rate": 9.343838847570594e-05,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42339909076690674,
|
|
"step": 4265,
|
|
"valid_targets_mean": 28761.8,
|
|
"valid_targets_min": 21638
|
|
},
|
|
{
|
|
"epoch": 4.55709711846318,
|
|
"grad_norm": 0.392164466338175,
|
|
"learning_rate": 9.320605972488247e-05,
|
|
"loss": 0.4192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41794681549072266,
|
|
"step": 4270,
|
|
"valid_targets_mean": 28815.6,
|
|
"valid_targets_min": 18518
|
|
},
|
|
{
|
|
"epoch": 4.562433297758805,
|
|
"grad_norm": 0.3262995318235304,
|
|
"learning_rate": 9.297376780973192e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41663309931755066,
|
|
"step": 4275,
|
|
"valid_targets_mean": 28847.7,
|
|
"valid_targets_min": 19122
|
|
},
|
|
{
|
|
"epoch": 4.567769477054429,
|
|
"grad_norm": 0.316402610712325,
|
|
"learning_rate": 9.274151398970456e-05,
|
|
"loss": 0.4191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4165911078453064,
|
|
"step": 4280,
|
|
"valid_targets_mean": 28901.4,
|
|
"valid_targets_min": 22373
|
|
},
|
|
{
|
|
"epoch": 4.573105656350053,
|
|
"grad_norm": 0.36717371358448186,
|
|
"learning_rate": 9.250929952404392e-05,
|
|
"loss": 0.4166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41654711961746216,
|
|
"step": 4285,
|
|
"valid_targets_mean": 28828.3,
|
|
"valid_targets_min": 18039
|
|
},
|
|
{
|
|
"epoch": 4.578441835645678,
|
|
"grad_norm": 0.3160343590979992,
|
|
"learning_rate": 9.227712567178018e-05,
|
|
"loss": 0.4204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42192721366882324,
|
|
"step": 4290,
|
|
"valid_targets_mean": 28948.7,
|
|
"valid_targets_min": 22103
|
|
},
|
|
{
|
|
"epoch": 4.583778014941302,
|
|
"grad_norm": 0.3516830197853869,
|
|
"learning_rate": 9.204499369172343e-05,
|
|
"loss": 0.4198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.422343373298645,
|
|
"step": 4295,
|
|
"valid_targets_mean": 28722.5,
|
|
"valid_targets_min": 20948
|
|
},
|
|
{
|
|
"epoch": 4.5891141942369265,
|
|
"grad_norm": 0.37186306459070556,
|
|
"learning_rate": 9.18129048424566e-05,
|
|
"loss": 0.4197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42082688212394714,
|
|
"step": 4300,
|
|
"valid_targets_mean": 29034.9,
|
|
"valid_targets_min": 18977
|
|
},
|
|
{
|
|
"epoch": 4.594450373532551,
|
|
"grad_norm": 0.3229935606530557,
|
|
"learning_rate": 9.158086038232894e-05,
|
|
"loss": 0.4194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4219149649143219,
|
|
"step": 4305,
|
|
"valid_targets_mean": 28729.0,
|
|
"valid_targets_min": 19451
|
|
},
|
|
{
|
|
"epoch": 4.599786552828175,
|
|
"grad_norm": 0.3579114181012708,
|
|
"learning_rate": 9.134886156944883e-05,
|
|
"loss": 0.4205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.424641489982605,
|
|
"step": 4310,
|
|
"valid_targets_mean": 28805.3,
|
|
"valid_targets_min": 17376
|
|
},
|
|
{
|
|
"epoch": 4.6051227321237995,
|
|
"grad_norm": 0.4211346528799389,
|
|
"learning_rate": 9.111690966167727e-05,
|
|
"loss": 0.4205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42049410939216614,
|
|
"step": 4315,
|
|
"valid_targets_mean": 28814.6,
|
|
"valid_targets_min": 17756
|
|
},
|
|
{
|
|
"epoch": 4.610458911419424,
|
|
"grad_norm": 0.3418778948184453,
|
|
"learning_rate": 9.088500591662099e-05,
|
|
"loss": 0.4191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41268154978752136,
|
|
"step": 4320,
|
|
"valid_targets_mean": 28675.5,
|
|
"valid_targets_min": 16701
|
|
},
|
|
{
|
|
"epoch": 4.615795090715048,
|
|
"grad_norm": 0.3409962577347365,
|
|
"learning_rate": 9.065315159162546e-05,
|
|
"loss": 0.4203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4228631556034088,
|
|
"step": 4325,
|
|
"valid_targets_mean": 28863.2,
|
|
"valid_targets_min": 19723
|
|
},
|
|
{
|
|
"epoch": 4.621131270010673,
|
|
"grad_norm": 0.2805196210087388,
|
|
"learning_rate": 9.042134794376833e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41876015067100525,
|
|
"step": 4330,
|
|
"valid_targets_mean": 28818.4,
|
|
"valid_targets_min": 21421
|
|
},
|
|
{
|
|
"epoch": 4.626467449306297,
|
|
"grad_norm": 0.36554232885208404,
|
|
"learning_rate": 9.018959622985238e-05,
|
|
"loss": 0.4166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4189041554927826,
|
|
"step": 4335,
|
|
"valid_targets_mean": 28728.7,
|
|
"valid_targets_min": 20241
|
|
},
|
|
{
|
|
"epoch": 4.631803628601921,
|
|
"grad_norm": 0.325278718856548,
|
|
"learning_rate": 8.99578977063989e-05,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.413724422454834,
|
|
"step": 4340,
|
|
"valid_targets_mean": 28706.4,
|
|
"valid_targets_min": 18963
|
|
},
|
|
{
|
|
"epoch": 4.637139807897546,
|
|
"grad_norm": 0.3431558034381059,
|
|
"learning_rate": 8.972625362964078e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4200518727302551,
|
|
"step": 4345,
|
|
"valid_targets_mean": 28807.0,
|
|
"valid_targets_min": 18152
|
|
},
|
|
{
|
|
"epoch": 4.64247598719317,
|
|
"grad_norm": 0.3936183086017574,
|
|
"learning_rate": 8.949466525551561e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4186498522758484,
|
|
"step": 4350,
|
|
"valid_targets_mean": 28906.8,
|
|
"valid_targets_min": 22116
|
|
},
|
|
{
|
|
"epoch": 4.647812166488794,
|
|
"grad_norm": 0.2589150848327681,
|
|
"learning_rate": 8.92631338396591e-05,
|
|
"loss": 0.4194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41684263944625854,
|
|
"step": 4355,
|
|
"valid_targets_mean": 28874.1,
|
|
"valid_targets_min": 21201
|
|
},
|
|
{
|
|
"epoch": 4.653148345784419,
|
|
"grad_norm": 0.42417381425709433,
|
|
"learning_rate": 8.903166063739801e-05,
|
|
"loss": 0.4196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4187427759170532,
|
|
"step": 4360,
|
|
"valid_targets_mean": 28964.5,
|
|
"valid_targets_min": 21909
|
|
},
|
|
{
|
|
"epoch": 4.658484525080043,
|
|
"grad_norm": 0.418946503334357,
|
|
"learning_rate": 8.880024690374367e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4185762405395508,
|
|
"step": 4365,
|
|
"valid_targets_mean": 28706.8,
|
|
"valid_targets_min": 20141
|
|
},
|
|
{
|
|
"epoch": 4.6638207043756665,
|
|
"grad_norm": 0.3436223121999799,
|
|
"learning_rate": 8.856889389338474e-05,
|
|
"loss": 0.4173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41747280955314636,
|
|
"step": 4370,
|
|
"valid_targets_mean": 28651.3,
|
|
"valid_targets_min": 19727
|
|
},
|
|
{
|
|
"epoch": 4.669156883671292,
|
|
"grad_norm": 0.3089754644534319,
|
|
"learning_rate": 8.833760286068079e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4154074192047119,
|
|
"step": 4375,
|
|
"valid_targets_mean": 28847.3,
|
|
"valid_targets_min": 21510
|
|
},
|
|
{
|
|
"epoch": 4.674493062966915,
|
|
"grad_norm": 0.35073468955471265,
|
|
"learning_rate": 8.81063750596554e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4182649254798889,
|
|
"step": 4380,
|
|
"valid_targets_mean": 28780.5,
|
|
"valid_targets_min": 22501
|
|
},
|
|
{
|
|
"epoch": 4.6798292422625405,
|
|
"grad_norm": 0.3380932879099673,
|
|
"learning_rate": 8.787521174398914e-05,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4169265329837799,
|
|
"step": 4385,
|
|
"valid_targets_mean": 28870.8,
|
|
"valid_targets_min": 22732
|
|
},
|
|
{
|
|
"epoch": 4.685165421558164,
|
|
"grad_norm": 0.33067693590054464,
|
|
"learning_rate": 8.764411416701317e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4172070622444153,
|
|
"step": 4390,
|
|
"valid_targets_mean": 28932.9,
|
|
"valid_targets_min": 22018
|
|
},
|
|
{
|
|
"epoch": 4.690501600853788,
|
|
"grad_norm": 0.27120664884307055,
|
|
"learning_rate": 8.741308358170202e-05,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42023521661758423,
|
|
"step": 4395,
|
|
"valid_targets_mean": 29050.9,
|
|
"valid_targets_min": 22707
|
|
},
|
|
{
|
|
"epoch": 4.695837780149413,
|
|
"grad_norm": 0.3965087073853456,
|
|
"learning_rate": 8.71821212406672e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4160880446434021,
|
|
"step": 4400,
|
|
"valid_targets_mean": 28808.7,
|
|
"valid_targets_min": 21127
|
|
},
|
|
{
|
|
"epoch": 4.701173959445037,
|
|
"grad_norm": 0.29841783707385844,
|
|
"learning_rate": 8.695122839615004e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41510114073753357,
|
|
"step": 4405,
|
|
"valid_targets_mean": 28624.5,
|
|
"valid_targets_min": 20551
|
|
},
|
|
{
|
|
"epoch": 4.706510138740661,
|
|
"grad_norm": 0.33245364871492056,
|
|
"learning_rate": 8.672040630001511e-05,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41986995935440063,
|
|
"step": 4410,
|
|
"valid_targets_mean": 28915.2,
|
|
"valid_targets_min": 22626
|
|
},
|
|
{
|
|
"epoch": 4.711846318036286,
|
|
"grad_norm": 0.306725569562423,
|
|
"learning_rate": 8.648965620374352e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4156710207462311,
|
|
"step": 4415,
|
|
"valid_targets_mean": 28743.1,
|
|
"valid_targets_min": 13210
|
|
},
|
|
{
|
|
"epoch": 4.71718249733191,
|
|
"grad_norm": 0.37738100167382255,
|
|
"learning_rate": 8.625897935842578e-05,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4194144010543823,
|
|
"step": 4420,
|
|
"valid_targets_mean": 28895.6,
|
|
"valid_targets_min": 22626
|
|
},
|
|
{
|
|
"epoch": 4.722518676627534,
|
|
"grad_norm": 0.34450445819147957,
|
|
"learning_rate": 8.602837701475551e-05,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.412597119808197,
|
|
"step": 4425,
|
|
"valid_targets_mean": 28783.0,
|
|
"valid_targets_min": 22626
|
|
},
|
|
{
|
|
"epoch": 4.727854855923159,
|
|
"grad_norm": 0.3038540518074493,
|
|
"learning_rate": 8.579785042302218e-05,
|
|
"loss": 0.4168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4147343635559082,
|
|
"step": 4430,
|
|
"valid_targets_mean": 28978.0,
|
|
"valid_targets_min": 22913
|
|
},
|
|
{
|
|
"epoch": 4.733191035218783,
|
|
"grad_norm": 0.44131096389065855,
|
|
"learning_rate": 8.556740083310463e-05,
|
|
"loss": 0.4191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4168371558189392,
|
|
"step": 4435,
|
|
"valid_targets_mean": 28870.3,
|
|
"valid_targets_min": 21449
|
|
},
|
|
{
|
|
"epoch": 4.7385272145144075,
|
|
"grad_norm": 0.39415552699192785,
|
|
"learning_rate": 8.533702949446424e-05,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41771623492240906,
|
|
"step": 4440,
|
|
"valid_targets_mean": 28764.5,
|
|
"valid_targets_min": 21786
|
|
},
|
|
{
|
|
"epoch": 4.743863393810032,
|
|
"grad_norm": 0.3287126670187571,
|
|
"learning_rate": 8.510673765613803e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4177106022834778,
|
|
"step": 4445,
|
|
"valid_targets_mean": 28957.6,
|
|
"valid_targets_min": 20693
|
|
},
|
|
{
|
|
"epoch": 4.749199573105656,
|
|
"grad_norm": 0.28632108037662457,
|
|
"learning_rate": 8.487652656673209e-05,
|
|
"loss": 0.4158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4170299172401428,
|
|
"step": 4450,
|
|
"valid_targets_mean": 28610.0,
|
|
"valid_targets_min": 21408
|
|
},
|
|
{
|
|
"epoch": 4.754535752401281,
|
|
"grad_norm": 0.3122943893226229,
|
|
"learning_rate": 8.464639747441459e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41472533345222473,
|
|
"step": 4455,
|
|
"valid_targets_mean": 28899.9,
|
|
"valid_targets_min": 20112
|
|
},
|
|
{
|
|
"epoch": 4.759871931696905,
|
|
"grad_norm": 0.2767625756846586,
|
|
"learning_rate": 8.441635162690927e-05,
|
|
"loss": 0.4178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4218922257423401,
|
|
"step": 4460,
|
|
"valid_targets_mean": 29026.5,
|
|
"valid_targets_min": 23953
|
|
},
|
|
{
|
|
"epoch": 4.765208110992529,
|
|
"grad_norm": 0.3016890644467839,
|
|
"learning_rate": 8.418639027148838e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41578352451324463,
|
|
"step": 4465,
|
|
"valid_targets_mean": 28831.1,
|
|
"valid_targets_min": 20621
|
|
},
|
|
{
|
|
"epoch": 4.770544290288154,
|
|
"grad_norm": 0.32405861806307124,
|
|
"learning_rate": 8.395651465496613e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41797369718551636,
|
|
"step": 4470,
|
|
"valid_targets_mean": 28837.1,
|
|
"valid_targets_min": 19761
|
|
},
|
|
{
|
|
"epoch": 4.775880469583778,
|
|
"grad_norm": 0.30666236660222157,
|
|
"learning_rate": 8.372672602369191e-05,
|
|
"loss": 0.4153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41425567865371704,
|
|
"step": 4475,
|
|
"valid_targets_mean": 28974.5,
|
|
"valid_targets_min": 21885
|
|
},
|
|
{
|
|
"epoch": 4.781216648879402,
|
|
"grad_norm": 0.2961485972394307,
|
|
"learning_rate": 8.34970256235434e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4199238717556,
|
|
"step": 4480,
|
|
"valid_targets_mean": 28834.1,
|
|
"valid_targets_min": 21855
|
|
},
|
|
{
|
|
"epoch": 4.786552828175027,
|
|
"grad_norm": 0.29191812028156455,
|
|
"learning_rate": 8.326741469992001e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4175301194190979,
|
|
"step": 4485,
|
|
"valid_targets_mean": 28841.4,
|
|
"valid_targets_min": 21643
|
|
},
|
|
{
|
|
"epoch": 4.791889007470651,
|
|
"grad_norm": 0.3043402658104164,
|
|
"learning_rate": 8.303789449773593e-05,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.418673038482666,
|
|
"step": 4490,
|
|
"valid_targets_mean": 28827.9,
|
|
"valid_targets_min": 21275
|
|
},
|
|
{
|
|
"epoch": 4.797225186766275,
|
|
"grad_norm": 0.29684399099924325,
|
|
"learning_rate": 8.28084662614135e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4179407060146332,
|
|
"step": 4495,
|
|
"valid_targets_mean": 28900.9,
|
|
"valid_targets_min": 22257
|
|
},
|
|
{
|
|
"epoch": 4.8025613660619,
|
|
"grad_norm": 0.3068560334729122,
|
|
"learning_rate": 8.257913123487646e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41665366291999817,
|
|
"step": 4500,
|
|
"valid_targets_mean": 28967.9,
|
|
"valid_targets_min": 22325
|
|
},
|
|
{
|
|
"epoch": 4.807897545357524,
|
|
"grad_norm": 0.383704390382649,
|
|
"learning_rate": 8.234989066154314e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41750168800354004,
|
|
"step": 4505,
|
|
"valid_targets_mean": 28859.4,
|
|
"valid_targets_min": 23095
|
|
},
|
|
{
|
|
"epoch": 4.8132337246531485,
|
|
"grad_norm": 0.31464696881108956,
|
|
"learning_rate": 8.212074578431983e-05,
|
|
"loss": 0.4152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41778695583343506,
|
|
"step": 4510,
|
|
"valid_targets_mean": 28916.0,
|
|
"valid_targets_min": 18485
|
|
},
|
|
{
|
|
"epoch": 4.818569903948773,
|
|
"grad_norm": 0.3401817851623499,
|
|
"learning_rate": 8.189169784559388e-05,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4168986678123474,
|
|
"step": 4515,
|
|
"valid_targets_mean": 28711.9,
|
|
"valid_targets_min": 21349
|
|
},
|
|
{
|
|
"epoch": 4.823906083244397,
|
|
"grad_norm": 0.31446079438830254,
|
|
"learning_rate": 8.166274808722717e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41559553146362305,
|
|
"step": 4520,
|
|
"valid_targets_mean": 28771.6,
|
|
"valid_targets_min": 20055
|
|
},
|
|
{
|
|
"epoch": 4.8292422625400215,
|
|
"grad_norm": 0.37148698409607017,
|
|
"learning_rate": 8.143389775054913e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4131711721420288,
|
|
"step": 4525,
|
|
"valid_targets_mean": 28967.4,
|
|
"valid_targets_min": 23007
|
|
},
|
|
{
|
|
"epoch": 4.834578441835646,
|
|
"grad_norm": 0.40259768703683657,
|
|
"learning_rate": 8.120514807635019e-05,
|
|
"loss": 0.4166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41874104738235474,
|
|
"step": 4530,
|
|
"valid_targets_mean": 28941.8,
|
|
"valid_targets_min": 21343
|
|
},
|
|
{
|
|
"epoch": 4.83991462113127,
|
|
"grad_norm": 0.3111587866730002,
|
|
"learning_rate": 8.097650030487509e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41815048456192017,
|
|
"step": 4535,
|
|
"valid_targets_mean": 28776.4,
|
|
"valid_targets_min": 17463
|
|
},
|
|
{
|
|
"epoch": 4.845250800426895,
|
|
"grad_norm": 0.4037662210760224,
|
|
"learning_rate": 8.07479556758159e-05,
|
|
"loss": 0.4187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4164711833000183,
|
|
"step": 4540,
|
|
"valid_targets_mean": 28861.0,
|
|
"valid_targets_min": 21525
|
|
},
|
|
{
|
|
"epoch": 4.850586979722519,
|
|
"grad_norm": 0.3653771293856302,
|
|
"learning_rate": 8.051951542830565e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4157501459121704,
|
|
"step": 4545,
|
|
"valid_targets_mean": 28876.5,
|
|
"valid_targets_min": 22176
|
|
},
|
|
{
|
|
"epoch": 4.855923159018143,
|
|
"grad_norm": 0.24448778166438007,
|
|
"learning_rate": 8.029118080091133e-05,
|
|
"loss": 0.417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4192211627960205,
|
|
"step": 4550,
|
|
"valid_targets_mean": 28988.9,
|
|
"valid_targets_min": 19752
|
|
},
|
|
{
|
|
"epoch": 4.861259338313768,
|
|
"grad_norm": 0.3065192095331345,
|
|
"learning_rate": 8.006295303162723e-05,
|
|
"loss": 0.4168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4171658456325531,
|
|
"step": 4555,
|
|
"valid_targets_mean": 28943.8,
|
|
"valid_targets_min": 18260
|
|
},
|
|
{
|
|
"epoch": 4.866595517609392,
|
|
"grad_norm": 0.34152052598085214,
|
|
"learning_rate": 7.983483335786838e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41691333055496216,
|
|
"step": 4560,
|
|
"valid_targets_mean": 28818.9,
|
|
"valid_targets_min": 19534
|
|
},
|
|
{
|
|
"epoch": 4.871931696905016,
|
|
"grad_norm": 0.34423404010059494,
|
|
"learning_rate": 7.960682301646367e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4178677499294281,
|
|
"step": 4565,
|
|
"valid_targets_mean": 28691.4,
|
|
"valid_targets_min": 21732
|
|
},
|
|
{
|
|
"epoch": 4.877267876200641,
|
|
"grad_norm": 0.3502990969830706,
|
|
"learning_rate": 7.937892324364927e-05,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41543686389923096,
|
|
"step": 4570,
|
|
"valid_targets_mean": 28915.1,
|
|
"valid_targets_min": 21491
|
|
},
|
|
{
|
|
"epoch": 4.882604055496264,
|
|
"grad_norm": 0.3067683857912805,
|
|
"learning_rate": 7.915113527506172e-05,
|
|
"loss": 0.4191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41942209005355835,
|
|
"step": 4575,
|
|
"valid_targets_mean": 29039.0,
|
|
"valid_targets_min": 23137
|
|
},
|
|
{
|
|
"epoch": 4.887940234791889,
|
|
"grad_norm": 0.2858703834942536,
|
|
"learning_rate": 7.892346034573155e-05,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41157788038253784,
|
|
"step": 4580,
|
|
"valid_targets_mean": 28865.8,
|
|
"valid_targets_min": 21471
|
|
},
|
|
{
|
|
"epoch": 4.893276414087513,
|
|
"grad_norm": 0.3073279216029923,
|
|
"learning_rate": 7.869589969007631e-05,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.418150931596756,
|
|
"step": 4585,
|
|
"valid_targets_mean": 28745.0,
|
|
"valid_targets_min": 22147
|
|
},
|
|
{
|
|
"epoch": 4.898612593383138,
|
|
"grad_norm": 0.2578006093415181,
|
|
"learning_rate": 7.846845454189395e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41997313499450684,
|
|
"step": 4590,
|
|
"valid_targets_mean": 28904.9,
|
|
"valid_targets_min": 23113
|
|
},
|
|
{
|
|
"epoch": 4.903948772678762,
|
|
"grad_norm": 0.31226067833635385,
|
|
"learning_rate": 7.82411261343563e-05,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41901105642318726,
|
|
"step": 4595,
|
|
"valid_targets_mean": 29030.1,
|
|
"valid_targets_min": 22724
|
|
},
|
|
{
|
|
"epoch": 4.909284951974386,
|
|
"grad_norm": 0.30264006623904355,
|
|
"learning_rate": 7.801391570000203e-05,
|
|
"loss": 0.4156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4186111092567444,
|
|
"step": 4600,
|
|
"valid_targets_mean": 28866.0,
|
|
"valid_targets_min": 19994
|
|
},
|
|
{
|
|
"epoch": 4.91462113127001,
|
|
"grad_norm": 0.30406421360587316,
|
|
"learning_rate": 7.778682447073037e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4153941869735718,
|
|
"step": 4605,
|
|
"valid_targets_mean": 28828.0,
|
|
"valid_targets_min": 21703
|
|
},
|
|
{
|
|
"epoch": 4.919957310565635,
|
|
"grad_norm": 0.2833979268310583,
|
|
"learning_rate": 7.755985367779417e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41638970375061035,
|
|
"step": 4610,
|
|
"valid_targets_mean": 28857.7,
|
|
"valid_targets_min": 18061
|
|
},
|
|
{
|
|
"epoch": 4.925293489861259,
|
|
"grad_norm": 0.3102148343129894,
|
|
"learning_rate": 7.73330045517932e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4176219403743744,
|
|
"step": 4615,
|
|
"valid_targets_mean": 28663.0,
|
|
"valid_targets_min": 20634
|
|
},
|
|
{
|
|
"epoch": 4.930629669156883,
|
|
"grad_norm": 0.2785617541092793,
|
|
"learning_rate": 7.710627832266772e-05,
|
|
"loss": 0.4181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4157201945781708,
|
|
"step": 4620,
|
|
"valid_targets_mean": 28889.3,
|
|
"valid_targets_min": 17762
|
|
},
|
|
{
|
|
"epoch": 4.935965848452508,
|
|
"grad_norm": 0.3049573179938812,
|
|
"learning_rate": 7.687967621969156e-05,
|
|
"loss": 0.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4141373336315155,
|
|
"step": 4625,
|
|
"valid_targets_mean": 28693.4,
|
|
"valid_targets_min": 19540
|
|
},
|
|
{
|
|
"epoch": 4.941302027748132,
|
|
"grad_norm": 0.2938453993328095,
|
|
"learning_rate": 7.665319947146567e-05,
|
|
"loss": 0.419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4159335494041443,
|
|
"step": 4630,
|
|
"valid_targets_mean": 28816.9,
|
|
"valid_targets_min": 21054
|
|
},
|
|
{
|
|
"epoch": 4.9466382070437565,
|
|
"grad_norm": 0.33632792903028347,
|
|
"learning_rate": 7.642684930591118e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4127083122730255,
|
|
"step": 4635,
|
|
"valid_targets_mean": 28978.0,
|
|
"valid_targets_min": 22893
|
|
},
|
|
{
|
|
"epoch": 4.951974386339381,
|
|
"grad_norm": 0.3439403913153363,
|
|
"learning_rate": 7.620062695026305e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4118298590183258,
|
|
"step": 4640,
|
|
"valid_targets_mean": 28901.1,
|
|
"valid_targets_min": 22439
|
|
},
|
|
{
|
|
"epoch": 4.957310565635005,
|
|
"grad_norm": 0.29311156891677753,
|
|
"learning_rate": 7.597453363106323e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4171184301376343,
|
|
"step": 4645,
|
|
"valid_targets_mean": 28937.9,
|
|
"valid_targets_min": 19838
|
|
},
|
|
{
|
|
"epoch": 4.9626467449306295,
|
|
"grad_norm": 0.2908629315122143,
|
|
"learning_rate": 7.574857057415401e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4137275218963623,
|
|
"step": 4650,
|
|
"valid_targets_mean": 28969.2,
|
|
"valid_targets_min": 21641
|
|
},
|
|
{
|
|
"epoch": 4.967982924226254,
|
|
"grad_norm": 0.30981310592678857,
|
|
"learning_rate": 7.552273900467149e-05,
|
|
"loss": 0.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4096093475818634,
|
|
"step": 4655,
|
|
"valid_targets_mean": 28731.8,
|
|
"valid_targets_min": 22058
|
|
},
|
|
{
|
|
"epoch": 4.973319103521878,
|
|
"grad_norm": 0.30163436028418267,
|
|
"learning_rate": 7.529704014703878e-05,
|
|
"loss": 0.4175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41557684540748596,
|
|
"step": 4660,
|
|
"valid_targets_mean": 28835.5,
|
|
"valid_targets_min": 20865
|
|
},
|
|
{
|
|
"epoch": 4.978655282817503,
|
|
"grad_norm": 0.32491627154320946,
|
|
"learning_rate": 7.50714752249596e-05,
|
|
"loss": 0.4155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41583991050720215,
|
|
"step": 4665,
|
|
"valid_targets_mean": 28765.9,
|
|
"valid_targets_min": 21199
|
|
},
|
|
{
|
|
"epoch": 4.983991462113127,
|
|
"grad_norm": 0.2716746089249112,
|
|
"learning_rate": 7.484604546141132e-05,
|
|
"loss": 0.4163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4182189404964447,
|
|
"step": 4670,
|
|
"valid_targets_mean": 28768.7,
|
|
"valid_targets_min": 18559
|
|
},
|
|
{
|
|
"epoch": 4.989327641408751,
|
|
"grad_norm": 0.317168435680314,
|
|
"learning_rate": 7.462075207863856e-05,
|
|
"loss": 0.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41550585627555847,
|
|
"step": 4675,
|
|
"valid_targets_mean": 28763.5,
|
|
"valid_targets_min": 23064
|
|
},
|
|
{
|
|
"epoch": 4.994663820704376,
|
|
"grad_norm": 0.2869849007273826,
|
|
"learning_rate": 7.439559629814657e-05,
|
|
"loss": 0.4143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4126055836677551,
|
|
"step": 4680,
|
|
"valid_targets_mean": 28877.9,
|
|
"valid_targets_min": 22243
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.3184488694647241,
|
|
"learning_rate": 7.417057934069445e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41238728165626526,
|
|
"step": 4685,
|
|
"valid_targets_mean": 28912.7,
|
|
"valid_targets_min": 17388
|
|
},
|
|
{
|
|
"epoch": 5.005336179295624,
|
|
"grad_norm": 0.2925300189389251,
|
|
"learning_rate": 7.394570242628875e-05,
|
|
"loss": 0.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41013145446777344,
|
|
"step": 4690,
|
|
"valid_targets_mean": 28927.0,
|
|
"valid_targets_min": 20840
|
|
},
|
|
{
|
|
"epoch": 5.010672358591249,
|
|
"grad_norm": 0.32574966258870014,
|
|
"learning_rate": 7.372096677417652e-05,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41071006655693054,
|
|
"step": 4695,
|
|
"valid_targets_mean": 28954.2,
|
|
"valid_targets_min": 22117
|
|
},
|
|
{
|
|
"epoch": 5.016008537886873,
|
|
"grad_norm": 0.3246810617001424,
|
|
"learning_rate": 7.349637360283912e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.410560667514801,
|
|
"step": 4700,
|
|
"valid_targets_mean": 28753.2,
|
|
"valid_targets_min": 22469
|
|
},
|
|
{
|
|
"epoch": 5.021344717182497,
|
|
"grad_norm": 0.32843109233372075,
|
|
"learning_rate": 7.327192412998531e-05,
|
|
"loss": 0.4119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4146524667739868,
|
|
"step": 4705,
|
|
"valid_targets_mean": 28796.6,
|
|
"valid_targets_min": 23038
|
|
},
|
|
{
|
|
"epoch": 5.026680896478122,
|
|
"grad_norm": 0.32243355206822194,
|
|
"learning_rate": 7.304761957254468e-05,
|
|
"loss": 0.4126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41325482726097107,
|
|
"step": 4710,
|
|
"valid_targets_mean": 28702.6,
|
|
"valid_targets_min": 18678
|
|
},
|
|
{
|
|
"epoch": 5.032017075773746,
|
|
"grad_norm": 0.30617066599895376,
|
|
"learning_rate": 7.282346114666125e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41367843747138977,
|
|
"step": 4715,
|
|
"valid_targets_mean": 28808.5,
|
|
"valid_targets_min": 21490
|
|
},
|
|
{
|
|
"epoch": 5.0373532550693705,
|
|
"grad_norm": 0.2622483401127021,
|
|
"learning_rate": 7.259945006768661e-05,
|
|
"loss": 0.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40470439195632935,
|
|
"step": 4720,
|
|
"valid_targets_mean": 28830.8,
|
|
"valid_targets_min": 20938
|
|
},
|
|
{
|
|
"epoch": 5.042689434364995,
|
|
"grad_norm": 0.24084554811633835,
|
|
"learning_rate": 7.237558755017358e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41252872347831726,
|
|
"step": 4725,
|
|
"valid_targets_mean": 28893.2,
|
|
"valid_targets_min": 21118
|
|
},
|
|
{
|
|
"epoch": 5.048025613660619,
|
|
"grad_norm": 0.3412153342192512,
|
|
"learning_rate": 7.215187480786934e-05,
|
|
"loss": 0.4111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41102367639541626,
|
|
"step": 4730,
|
|
"valid_targets_mean": 28947.7,
|
|
"valid_targets_min": 19871
|
|
},
|
|
{
|
|
"epoch": 5.0533617929562435,
|
|
"grad_norm": 0.25635523803675986,
|
|
"learning_rate": 7.192831305370913e-05,
|
|
"loss": 0.4114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4131872057914734,
|
|
"step": 4735,
|
|
"valid_targets_mean": 28995.2,
|
|
"valid_targets_min": 20824
|
|
},
|
|
{
|
|
"epoch": 5.058697972251868,
|
|
"grad_norm": 0.3469477127452649,
|
|
"learning_rate": 7.170490349980961e-05,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4078807830810547,
|
|
"step": 4740,
|
|
"valid_targets_mean": 28730.4,
|
|
"valid_targets_min": 19013
|
|
},
|
|
{
|
|
"epoch": 5.064034151547492,
|
|
"grad_norm": 0.31461141347673727,
|
|
"learning_rate": 7.148164735746206e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41446444392204285,
|
|
"step": 4745,
|
|
"valid_targets_mean": 28861.2,
|
|
"valid_targets_min": 21387
|
|
},
|
|
{
|
|
"epoch": 5.069370330843117,
|
|
"grad_norm": 0.29660186802360977,
|
|
"learning_rate": 7.125854583712615e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4084359407424927,
|
|
"step": 4750,
|
|
"valid_targets_mean": 29104.8,
|
|
"valid_targets_min": 22716
|
|
},
|
|
{
|
|
"epoch": 5.074706510138741,
|
|
"grad_norm": 0.3010971793356668,
|
|
"learning_rate": 7.103560014842308e-05,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4073614180088043,
|
|
"step": 4755,
|
|
"valid_targets_mean": 28881.0,
|
|
"valid_targets_min": 22729
|
|
},
|
|
{
|
|
"epoch": 5.080042689434365,
|
|
"grad_norm": 0.2862444691931529,
|
|
"learning_rate": 7.08128115001293e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4098625183105469,
|
|
"step": 4760,
|
|
"valid_targets_mean": 28961.6,
|
|
"valid_targets_min": 19299
|
|
},
|
|
{
|
|
"epoch": 5.08537886872999,
|
|
"grad_norm": 0.25020308493368776,
|
|
"learning_rate": 7.059018110016973e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4086630642414093,
|
|
"step": 4765,
|
|
"valid_targets_mean": 29057.1,
|
|
"valid_targets_min": 22094
|
|
},
|
|
{
|
|
"epoch": 5.090715048025614,
|
|
"grad_norm": 0.27847527159225727,
|
|
"learning_rate": 7.036771015561121e-05,
|
|
"loss": 0.4126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4140252470970154,
|
|
"step": 4770,
|
|
"valid_targets_mean": 28867.9,
|
|
"valid_targets_min": 21388
|
|
},
|
|
{
|
|
"epoch": 5.096051227321238,
|
|
"grad_norm": 0.3011903014699775,
|
|
"learning_rate": 7.01453998726562e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.411713182926178,
|
|
"step": 4775,
|
|
"valid_targets_mean": 28863.7,
|
|
"valid_targets_min": 20348
|
|
},
|
|
{
|
|
"epoch": 5.101387406616863,
|
|
"grad_norm": 0.30730661666666664,
|
|
"learning_rate": 6.992325145663598e-05,
|
|
"loss": 0.4123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4111214280128479,
|
|
"step": 4780,
|
|
"valid_targets_mean": 28753.7,
|
|
"valid_targets_min": 22883
|
|
},
|
|
{
|
|
"epoch": 5.106723585912487,
|
|
"grad_norm": 0.32196774097231146,
|
|
"learning_rate": 6.970126611200427e-05,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41407275199890137,
|
|
"step": 4785,
|
|
"valid_targets_mean": 28852.9,
|
|
"valid_targets_min": 20224
|
|
},
|
|
{
|
|
"epoch": 5.112059765208111,
|
|
"grad_norm": 0.30632694717862874,
|
|
"learning_rate": 6.947944504233056e-05,
|
|
"loss": 0.4126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41604679822921753,
|
|
"step": 4790,
|
|
"valid_targets_mean": 28705.6,
|
|
"valid_targets_min": 19229
|
|
},
|
|
{
|
|
"epoch": 5.117395944503735,
|
|
"grad_norm": 0.253831090174983,
|
|
"learning_rate": 6.925778945029372e-05,
|
|
"loss": 0.4113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4120824933052063,
|
|
"step": 4795,
|
|
"valid_targets_mean": 28878.8,
|
|
"valid_targets_min": 22820
|
|
},
|
|
{
|
|
"epoch": 5.122732123799359,
|
|
"grad_norm": 0.3456780929737466,
|
|
"learning_rate": 6.903630053767547e-05,
|
|
"loss": 0.4123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4113757014274597,
|
|
"step": 4800,
|
|
"valid_targets_mean": 28891.6,
|
|
"valid_targets_min": 19016
|
|
},
|
|
{
|
|
"epoch": 5.128068303094984,
|
|
"grad_norm": 0.3165719394775509,
|
|
"learning_rate": 6.881497950535372e-05,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41525954008102417,
|
|
"step": 4805,
|
|
"valid_targets_mean": 28823.5,
|
|
"valid_targets_min": 19620
|
|
},
|
|
{
|
|
"epoch": 5.133404482390608,
|
|
"grad_norm": 0.2661781179345722,
|
|
"learning_rate": 6.859382755329623e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4119814336299896,
|
|
"step": 4810,
|
|
"valid_targets_mean": 28951.9,
|
|
"valid_targets_min": 21979
|
|
},
|
|
{
|
|
"epoch": 5.138740661686232,
|
|
"grad_norm": 0.3053995831655596,
|
|
"learning_rate": 6.837284588055401e-05,
|
|
"loss": 0.4135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41530489921569824,
|
|
"step": 4815,
|
|
"valid_targets_mean": 28814.4,
|
|
"valid_targets_min": 22303
|
|
},
|
|
{
|
|
"epoch": 5.144076840981857,
|
|
"grad_norm": 0.2985414542902706,
|
|
"learning_rate": 6.81520356852549e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4107014536857605,
|
|
"step": 4820,
|
|
"valid_targets_mean": 28636.6,
|
|
"valid_targets_min": 19438
|
|
},
|
|
{
|
|
"epoch": 5.149413020277481,
|
|
"grad_norm": 0.27395231323939345,
|
|
"learning_rate": 6.793139816459688e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41278764605522156,
|
|
"step": 4825,
|
|
"valid_targets_mean": 28825.8,
|
|
"valid_targets_min": 22834
|
|
},
|
|
{
|
|
"epoch": 5.154749199573105,
|
|
"grad_norm": 0.2815876337160247,
|
|
"learning_rate": 6.771093451484184e-05,
|
|
"loss": 0.411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4093128740787506,
|
|
"step": 4830,
|
|
"valid_targets_mean": 28861.8,
|
|
"valid_targets_min": 20543
|
|
},
|
|
{
|
|
"epoch": 5.16008537886873,
|
|
"grad_norm": 0.2577392190350307,
|
|
"learning_rate": 6.749064593130896e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4116304814815521,
|
|
"step": 4835,
|
|
"valid_targets_mean": 28712.0,
|
|
"valid_targets_min": 21420
|
|
},
|
|
{
|
|
"epoch": 5.165421558164354,
|
|
"grad_norm": 0.32451201108936994,
|
|
"learning_rate": 6.727053360836813e-05,
|
|
"loss": 0.4125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4091717004776001,
|
|
"step": 4840,
|
|
"valid_targets_mean": 28862.9,
|
|
"valid_targets_min": 19087
|
|
},
|
|
{
|
|
"epoch": 5.1707577374599785,
|
|
"grad_norm": 0.26628464480777275,
|
|
"learning_rate": 6.705059873943375e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41106921434402466,
|
|
"step": 4845,
|
|
"valid_targets_mean": 28710.1,
|
|
"valid_targets_min": 16993
|
|
},
|
|
{
|
|
"epoch": 5.176093916755603,
|
|
"grad_norm": 0.2698354108808259,
|
|
"learning_rate": 6.683084251695794e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4063991904258728,
|
|
"step": 4850,
|
|
"valid_targets_mean": 28867.3,
|
|
"valid_targets_min": 22244
|
|
},
|
|
{
|
|
"epoch": 5.181430096051227,
|
|
"grad_norm": 0.28447317240929537,
|
|
"learning_rate": 6.66112661324243e-05,
|
|
"loss": 0.4125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4150131046772003,
|
|
"step": 4855,
|
|
"valid_targets_mean": 28886.2,
|
|
"valid_targets_min": 18504
|
|
},
|
|
{
|
|
"epoch": 5.1867662753468515,
|
|
"grad_norm": 0.3206606983392642,
|
|
"learning_rate": 6.639187077634141e-05,
|
|
"loss": 0.4113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41379764676094055,
|
|
"step": 4860,
|
|
"valid_targets_mean": 28757.9,
|
|
"valid_targets_min": 21961
|
|
},
|
|
{
|
|
"epoch": 5.192102454642476,
|
|
"grad_norm": 0.2944187342507382,
|
|
"learning_rate": 6.617265763823623e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.410386860370636,
|
|
"step": 4865,
|
|
"valid_targets_mean": 28685.1,
|
|
"valid_targets_min": 21053
|
|
},
|
|
{
|
|
"epoch": 5.1974386339381,
|
|
"grad_norm": 0.31442256997337714,
|
|
"learning_rate": 6.59536279066479e-05,
|
|
"loss": 0.4115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40691661834716797,
|
|
"step": 4870,
|
|
"valid_targets_mean": 28880.3,
|
|
"valid_targets_min": 20361
|
|
},
|
|
{
|
|
"epoch": 5.202774813233725,
|
|
"grad_norm": 0.28243262362895716,
|
|
"learning_rate": 6.573478276912102e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40908265113830566,
|
|
"step": 4875,
|
|
"valid_targets_mean": 28886.4,
|
|
"valid_targets_min": 22533
|
|
},
|
|
{
|
|
"epoch": 5.208110992529349,
|
|
"grad_norm": 0.29137903655336894,
|
|
"learning_rate": 6.551612341219951e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4124106168746948,
|
|
"step": 4880,
|
|
"valid_targets_mean": 28826.8,
|
|
"valid_targets_min": 21510
|
|
},
|
|
{
|
|
"epoch": 5.213447171824973,
|
|
"grad_norm": 0.3062013407968183,
|
|
"learning_rate": 6.529765102141987e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4086436927318573,
|
|
"step": 4885,
|
|
"valid_targets_mean": 28920.0,
|
|
"valid_targets_min": 21965
|
|
},
|
|
{
|
|
"epoch": 5.218783351120598,
|
|
"grad_norm": 0.30435542062456195,
|
|
"learning_rate": 6.507936678130493e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4137675166130066,
|
|
"step": 4890,
|
|
"valid_targets_mean": 28880.1,
|
|
"valid_targets_min": 22355
|
|
},
|
|
{
|
|
"epoch": 5.224119530416222,
|
|
"grad_norm": 0.3029459152474519,
|
|
"learning_rate": 6.486127187535749e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41013699769973755,
|
|
"step": 4895,
|
|
"valid_targets_mean": 28795.0,
|
|
"valid_targets_min": 19807
|
|
},
|
|
{
|
|
"epoch": 5.229455709711846,
|
|
"grad_norm": 0.27318022386491375,
|
|
"learning_rate": 6.464336748605368e-05,
|
|
"loss": 0.4121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4109240770339966,
|
|
"step": 4900,
|
|
"valid_targets_mean": 28784.6,
|
|
"valid_targets_min": 22323
|
|
},
|
|
{
|
|
"epoch": 5.234791889007471,
|
|
"grad_norm": 0.28034368378085794,
|
|
"learning_rate": 6.44256547948368e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4095957577228546,
|
|
"step": 4905,
|
|
"valid_targets_mean": 28739.1,
|
|
"valid_targets_min": 22580
|
|
},
|
|
{
|
|
"epoch": 5.240128068303095,
|
|
"grad_norm": 0.28199830261418546,
|
|
"learning_rate": 6.420813498211071e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4090818464756012,
|
|
"step": 4910,
|
|
"valid_targets_mean": 28894.7,
|
|
"valid_targets_min": 18300
|
|
},
|
|
{
|
|
"epoch": 5.245464247598719,
|
|
"grad_norm": 0.27836654922584375,
|
|
"learning_rate": 6.399080922723352e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41042056679725647,
|
|
"step": 4915,
|
|
"valid_targets_mean": 28878.4,
|
|
"valid_targets_min": 21215
|
|
},
|
|
{
|
|
"epoch": 5.250800426894344,
|
|
"grad_norm": 0.24496887304740644,
|
|
"learning_rate": 6.377367870851124e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41321632266044617,
|
|
"step": 4920,
|
|
"valid_targets_mean": 28918.7,
|
|
"valid_targets_min": 22650
|
|
},
|
|
{
|
|
"epoch": 5.256136606189968,
|
|
"grad_norm": 0.26396913584127146,
|
|
"learning_rate": 6.355674460319128e-05,
|
|
"loss": 0.4091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4113008975982666,
|
|
"step": 4925,
|
|
"valid_targets_mean": 28809.0,
|
|
"valid_targets_min": 21227
|
|
},
|
|
{
|
|
"epoch": 5.2614727854855925,
|
|
"grad_norm": 0.3216499292759843,
|
|
"learning_rate": 6.334000808745617e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40966159105300903,
|
|
"step": 4930,
|
|
"valid_targets_mean": 29048.1,
|
|
"valid_targets_min": 21325
|
|
},
|
|
{
|
|
"epoch": 5.266808964781217,
|
|
"grad_norm": 0.2518698461231176,
|
|
"learning_rate": 6.312347033641712e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40684419870376587,
|
|
"step": 4935,
|
|
"valid_targets_mean": 29002.1,
|
|
"valid_targets_min": 23299
|
|
},
|
|
{
|
|
"epoch": 5.272145144076841,
|
|
"grad_norm": 0.26439597268229703,
|
|
"learning_rate": 6.290713252410772e-05,
|
|
"loss": 0.4097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4131563901901245,
|
|
"step": 4940,
|
|
"valid_targets_mean": 28740.8,
|
|
"valid_targets_min": 20226
|
|
},
|
|
{
|
|
"epoch": 5.277481323372466,
|
|
"grad_norm": 0.2482891922972573,
|
|
"learning_rate": 6.269099582347743e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4077172875404358,
|
|
"step": 4945,
|
|
"valid_targets_mean": 28777.4,
|
|
"valid_targets_min": 20923
|
|
},
|
|
{
|
|
"epoch": 5.28281750266809,
|
|
"grad_norm": 0.2901076069620523,
|
|
"learning_rate": 6.247506140638535e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41215550899505615,
|
|
"step": 4950,
|
|
"valid_targets_mean": 28667.8,
|
|
"valid_targets_min": 17866
|
|
},
|
|
{
|
|
"epoch": 5.288153681963714,
|
|
"grad_norm": 0.2588091918678108,
|
|
"learning_rate": 6.225933044359389e-05,
|
|
"loss": 0.4117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40798309445381165,
|
|
"step": 4955,
|
|
"valid_targets_mean": 28690.2,
|
|
"valid_targets_min": 19451
|
|
},
|
|
{
|
|
"epoch": 5.293489861259339,
|
|
"grad_norm": 0.32159144819204216,
|
|
"learning_rate": 6.204380410476227e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4119293689727783,
|
|
"step": 4960,
|
|
"valid_targets_mean": 28846.5,
|
|
"valid_targets_min": 15600
|
|
},
|
|
{
|
|
"epoch": 5.298826040554963,
|
|
"grad_norm": 0.24255140450372784,
|
|
"learning_rate": 6.182848355844035e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4105904996395111,
|
|
"step": 4965,
|
|
"valid_targets_mean": 28667.0,
|
|
"valid_targets_min": 21401
|
|
},
|
|
{
|
|
"epoch": 5.304162219850587,
|
|
"grad_norm": 0.27448018537267005,
|
|
"learning_rate": 6.161336997206222e-05,
|
|
"loss": 0.4109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4086799621582031,
|
|
"step": 4970,
|
|
"valid_targets_mean": 28726.4,
|
|
"valid_targets_min": 20804
|
|
},
|
|
{
|
|
"epoch": 5.309498399146212,
|
|
"grad_norm": 0.2566682976560549,
|
|
"learning_rate": 6.13984645119397e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40948301553726196,
|
|
"step": 4975,
|
|
"valid_targets_mean": 28890.5,
|
|
"valid_targets_min": 19729
|
|
},
|
|
{
|
|
"epoch": 5.314834578441836,
|
|
"grad_norm": 0.2594204239198475,
|
|
"learning_rate": 6.118376834325645e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4136618971824646,
|
|
"step": 4980,
|
|
"valid_targets_mean": 28906.8,
|
|
"valid_targets_min": 22746
|
|
},
|
|
{
|
|
"epoch": 5.3201707577374595,
|
|
"grad_norm": 0.2925635711676992,
|
|
"learning_rate": 6.0969282630061165e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4110929071903229,
|
|
"step": 4985,
|
|
"valid_targets_mean": 28713.1,
|
|
"valid_targets_min": 22006
|
|
},
|
|
{
|
|
"epoch": 5.325506937033085,
|
|
"grad_norm": 0.3313501113694902,
|
|
"learning_rate": 6.0755008535261626e-05,
|
|
"loss": 0.4125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4129217863082886,
|
|
"step": 4990,
|
|
"valid_targets_mean": 28719.1,
|
|
"valid_targets_min": 19168
|
|
},
|
|
{
|
|
"epoch": 5.330843116328708,
|
|
"grad_norm": 0.2798300156894228,
|
|
"learning_rate": 6.054094722061812e-05,
|
|
"loss": 0.411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4137015640735626,
|
|
"step": 4995,
|
|
"valid_targets_mean": 28776.3,
|
|
"valid_targets_min": 22377
|
|
},
|
|
{
|
|
"epoch": 5.336179295624333,
|
|
"grad_norm": 0.26481226416695713,
|
|
"learning_rate": 6.032709984673739e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40870779752731323,
|
|
"step": 5000,
|
|
"valid_targets_mean": 28733.4,
|
|
"valid_targets_min": 16381
|
|
},
|
|
{
|
|
"epoch": 5.341515474919957,
|
|
"grad_norm": 0.2574571665394944,
|
|
"learning_rate": 6.0113467573066174e-05,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4053928852081299,
|
|
"step": 5005,
|
|
"valid_targets_mean": 28782.5,
|
|
"valid_targets_min": 21917
|
|
},
|
|
{
|
|
"epoch": 5.346851654215581,
|
|
"grad_norm": 0.2719571419049441,
|
|
"learning_rate": 5.990005155788495e-05,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41064536571502686,
|
|
"step": 5010,
|
|
"valid_targets_mean": 28818.9,
|
|
"valid_targets_min": 20918
|
|
},
|
|
{
|
|
"epoch": 5.352187833511206,
|
|
"grad_norm": 0.26204971312606534,
|
|
"learning_rate": 5.9686852958301765e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41017162799835205,
|
|
"step": 5015,
|
|
"valid_targets_mean": 28822.9,
|
|
"valid_targets_min": 21199
|
|
},
|
|
{
|
|
"epoch": 5.35752401280683,
|
|
"grad_norm": 0.26277915538565405,
|
|
"learning_rate": 5.9473872930245745e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.407803475856781,
|
|
"step": 5020,
|
|
"valid_targets_mean": 28851.6,
|
|
"valid_targets_min": 20387
|
|
},
|
|
{
|
|
"epoch": 5.362860192102454,
|
|
"grad_norm": 0.2532508057206155,
|
|
"learning_rate": 5.9261112628461056e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4040972590446472,
|
|
"step": 5025,
|
|
"valid_targets_mean": 28807.4,
|
|
"valid_targets_min": 20681
|
|
},
|
|
{
|
|
"epoch": 5.368196371398079,
|
|
"grad_norm": 0.26826146816164903,
|
|
"learning_rate": 5.9048573206500536e-05,
|
|
"loss": 0.4089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4086844027042389,
|
|
"step": 5030,
|
|
"valid_targets_mean": 28767.2,
|
|
"valid_targets_min": 17033
|
|
},
|
|
{
|
|
"epoch": 5.373532550693703,
|
|
"grad_norm": 0.2732203900470291,
|
|
"learning_rate": 5.883625581671934e-05,
|
|
"loss": 0.4076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40953680872917175,
|
|
"step": 5035,
|
|
"valid_targets_mean": 28861.1,
|
|
"valid_targets_min": 20183
|
|
},
|
|
{
|
|
"epoch": 5.378868729989327,
|
|
"grad_norm": 0.26014816155601717,
|
|
"learning_rate": 5.8624161610268956e-05,
|
|
"loss": 0.4089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40767329931259155,
|
|
"step": 5040,
|
|
"valid_targets_mean": 28843.2,
|
|
"valid_targets_min": 18915
|
|
},
|
|
{
|
|
"epoch": 5.384204909284952,
|
|
"grad_norm": 0.22818084752138812,
|
|
"learning_rate": 5.8412291737090664e-05,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40318650007247925,
|
|
"step": 5045,
|
|
"valid_targets_mean": 28832.0,
|
|
"valid_targets_min": 22102
|
|
},
|
|
{
|
|
"epoch": 5.389541088580576,
|
|
"grad_norm": 0.27327260028041966,
|
|
"learning_rate": 5.820064734590959e-05,
|
|
"loss": 0.408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4099733233451843,
|
|
"step": 5050,
|
|
"valid_targets_mean": 28704.1,
|
|
"valid_targets_min": 19957
|
|
},
|
|
{
|
|
"epoch": 5.3948772678762005,
|
|
"grad_norm": 0.28859428097531076,
|
|
"learning_rate": 5.798922958422819e-05,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4103071987628937,
|
|
"step": 5055,
|
|
"valid_targets_mean": 28830.3,
|
|
"valid_targets_min": 21361
|
|
},
|
|
{
|
|
"epoch": 5.400213447171825,
|
|
"grad_norm": 0.22978650256782315,
|
|
"learning_rate": 5.777803959832029e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4068617820739746,
|
|
"step": 5060,
|
|
"valid_targets_mean": 28820.3,
|
|
"valid_targets_min": 22322
|
|
},
|
|
{
|
|
"epoch": 5.405549626467449,
|
|
"grad_norm": 0.27127959486835485,
|
|
"learning_rate": 5.756707853322466e-05,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40688830614089966,
|
|
"step": 5065,
|
|
"valid_targets_mean": 28791.6,
|
|
"valid_targets_min": 21860
|
|
},
|
|
{
|
|
"epoch": 5.4108858057630735,
|
|
"grad_norm": 0.2506525033173379,
|
|
"learning_rate": 5.7356347532738906e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41265010833740234,
|
|
"step": 5070,
|
|
"valid_targets_mean": 28704.1,
|
|
"valid_targets_min": 20010
|
|
},
|
|
{
|
|
"epoch": 5.416221985058698,
|
|
"grad_norm": 0.2751486277336242,
|
|
"learning_rate": 5.714584773941336e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41021281480789185,
|
|
"step": 5075,
|
|
"valid_targets_mean": 28805.6,
|
|
"valid_targets_min": 20428
|
|
},
|
|
{
|
|
"epoch": 5.421558164354322,
|
|
"grad_norm": 0.24266350069275283,
|
|
"learning_rate": 5.693558029454458e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4110463857650757,
|
|
"step": 5080,
|
|
"valid_targets_mean": 28593.4,
|
|
"valid_targets_min": 20891
|
|
},
|
|
{
|
|
"epoch": 5.426894343649947,
|
|
"grad_norm": 0.27938597765654943,
|
|
"learning_rate": 5.6725546338169675e-05,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40841275453567505,
|
|
"step": 5085,
|
|
"valid_targets_mean": 29122.6,
|
|
"valid_targets_min": 19030
|
|
},
|
|
{
|
|
"epoch": 5.432230522945571,
|
|
"grad_norm": 0.2895610586627878,
|
|
"learning_rate": 5.651574700905955e-05,
|
|
"loss": 0.4075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40801024436950684,
|
|
"step": 5090,
|
|
"valid_targets_mean": 28741.1,
|
|
"valid_targets_min": 21502
|
|
},
|
|
{
|
|
"epoch": 5.437566702241195,
|
|
"grad_norm": 0.2427061435506355,
|
|
"learning_rate": 5.6306183444713066e-05,
|
|
"loss": 0.407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4059031903743744,
|
|
"step": 5095,
|
|
"valid_targets_mean": 28847.6,
|
|
"valid_targets_min": 22709
|
|
},
|
|
{
|
|
"epoch": 5.44290288153682,
|
|
"grad_norm": 0.2779281039684238,
|
|
"learning_rate": 5.609685678135089e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4095008969306946,
|
|
"step": 5100,
|
|
"valid_targets_mean": 28784.6,
|
|
"valid_targets_min": 19776
|
|
},
|
|
{
|
|
"epoch": 5.448239060832444,
|
|
"grad_norm": 0.26779235200255996,
|
|
"learning_rate": 5.588776815390913e-05,
|
|
"loss": 0.4082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40894991159439087,
|
|
"step": 5105,
|
|
"valid_targets_mean": 28831.8,
|
|
"valid_targets_min": 23168
|
|
},
|
|
{
|
|
"epoch": 5.453575240128068,
|
|
"grad_norm": 0.2838087294033303,
|
|
"learning_rate": 5.5678918696033355e-05,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4082152843475342,
|
|
"step": 5110,
|
|
"valid_targets_mean": 28729.3,
|
|
"valid_targets_min": 18829
|
|
},
|
|
{
|
|
"epoch": 5.458911419423693,
|
|
"grad_norm": 0.2305536584413741,
|
|
"learning_rate": 5.5470309540072443e-05,
|
|
"loss": 0.406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.402679443359375,
|
|
"step": 5115,
|
|
"valid_targets_mean": 28995.2,
|
|
"valid_targets_min": 21516
|
|
},
|
|
{
|
|
"epoch": 5.464247598719317,
|
|
"grad_norm": 0.22816329039508723,
|
|
"learning_rate": 5.526194181707236e-05,
|
|
"loss": 0.408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4087204039096832,
|
|
"step": 5120,
|
|
"valid_targets_mean": 28795.0,
|
|
"valid_targets_min": 19489
|
|
},
|
|
{
|
|
"epoch": 5.4695837780149414,
|
|
"grad_norm": 0.24529677775400807,
|
|
"learning_rate": 5.505381665677001e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4101272225379944,
|
|
"step": 5125,
|
|
"valid_targets_mean": 28636.4,
|
|
"valid_targets_min": 16244
|
|
},
|
|
{
|
|
"epoch": 5.474919957310566,
|
|
"grad_norm": 0.28571712158040247,
|
|
"learning_rate": 5.484593518758716e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4107014238834381,
|
|
"step": 5130,
|
|
"valid_targets_mean": 28694.7,
|
|
"valid_targets_min": 22274
|
|
},
|
|
{
|
|
"epoch": 5.48025613660619,
|
|
"grad_norm": 0.24596211486579148,
|
|
"learning_rate": 5.4638298536624445e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40564632415771484,
|
|
"step": 5135,
|
|
"valid_targets_mean": 28841.2,
|
|
"valid_targets_min": 21563
|
|
},
|
|
{
|
|
"epoch": 5.4855923159018145,
|
|
"grad_norm": 0.26465991092206825,
|
|
"learning_rate": 5.443090782965496e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4095415472984314,
|
|
"step": 5140,
|
|
"valid_targets_mean": 28906.0,
|
|
"valid_targets_min": 21832
|
|
},
|
|
{
|
|
"epoch": 5.490928495197439,
|
|
"grad_norm": 0.22267519414163728,
|
|
"learning_rate": 5.422376419111848e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40773892402648926,
|
|
"step": 5145,
|
|
"valid_targets_mean": 28854.4,
|
|
"valid_targets_min": 22257
|
|
},
|
|
{
|
|
"epoch": 5.496264674493063,
|
|
"grad_norm": 0.2808312854307953,
|
|
"learning_rate": 5.401686874411516e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40362006425857544,
|
|
"step": 5150,
|
|
"valid_targets_mean": 28736.9,
|
|
"valid_targets_min": 18357
|
|
},
|
|
{
|
|
"epoch": 5.501600853788688,
|
|
"grad_norm": 0.28073780928417497,
|
|
"learning_rate": 5.381022261039944e-05,
|
|
"loss": 0.4089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4081991910934448,
|
|
"step": 5155,
|
|
"valid_targets_mean": 28902.0,
|
|
"valid_targets_min": 22967
|
|
},
|
|
{
|
|
"epoch": 5.506937033084312,
|
|
"grad_norm": 0.2631869993080492,
|
|
"learning_rate": 5.3603826910374153e-05,
|
|
"loss": 0.407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40714550018310547,
|
|
"step": 5160,
|
|
"valid_targets_mean": 28665.1,
|
|
"valid_targets_min": 21412
|
|
},
|
|
{
|
|
"epoch": 5.512273212379936,
|
|
"grad_norm": 0.2884581744505189,
|
|
"learning_rate": 5.3397682763084145e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4094289541244507,
|
|
"step": 5165,
|
|
"valid_targets_mean": 28766.8,
|
|
"valid_targets_min": 19331
|
|
},
|
|
{
|
|
"epoch": 5.517609391675561,
|
|
"grad_norm": 0.2575293949311548,
|
|
"learning_rate": 5.319179128621053e-05,
|
|
"loss": 0.408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4074215888977051,
|
|
"step": 5170,
|
|
"valid_targets_mean": 28829.6,
|
|
"valid_targets_min": 20404
|
|
},
|
|
{
|
|
"epoch": 5.522945570971185,
|
|
"grad_norm": 0.2752574398921403,
|
|
"learning_rate": 5.2986153596064446e-05,
|
|
"loss": 0.4082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4028487801551819,
|
|
"step": 5175,
|
|
"valid_targets_mean": 28834.2,
|
|
"valid_targets_min": 22315
|
|
},
|
|
{
|
|
"epoch": 5.528281750266809,
|
|
"grad_norm": 0.26181590292808854,
|
|
"learning_rate": 5.278077080758106e-05,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40661585330963135,
|
|
"step": 5180,
|
|
"valid_targets_mean": 28683.8,
|
|
"valid_targets_min": 19502
|
|
},
|
|
{
|
|
"epoch": 5.533617929562434,
|
|
"grad_norm": 0.30026428350022577,
|
|
"learning_rate": 5.257564403431341e-05,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41059237718582153,
|
|
"step": 5185,
|
|
"valid_targets_mean": 28661.4,
|
|
"valid_targets_min": 20971
|
|
},
|
|
{
|
|
"epoch": 5.538954108858057,
|
|
"grad_norm": 0.33290674109160145,
|
|
"learning_rate": 5.23707743884265e-05,
|
|
"loss": 0.4076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40834152698516846,
|
|
"step": 5190,
|
|
"valid_targets_mean": 28679.2,
|
|
"valid_targets_min": 18819
|
|
},
|
|
{
|
|
"epoch": 5.544290288153682,
|
|
"grad_norm": 0.2451716687040915,
|
|
"learning_rate": 5.216616298069132e-05,
|
|
"loss": 0.4059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40883660316467285,
|
|
"step": 5195,
|
|
"valid_targets_mean": 28750.6,
|
|
"valid_targets_min": 21745
|
|
},
|
|
{
|
|
"epoch": 5.549626467449306,
|
|
"grad_norm": 0.2518337780712598,
|
|
"learning_rate": 5.196181092047858e-05,
|
|
"loss": 0.4065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4079820513725281,
|
|
"step": 5200,
|
|
"valid_targets_mean": 28827.4,
|
|
"valid_targets_min": 19230
|
|
},
|
|
{
|
|
"epoch": 5.554962646744931,
|
|
"grad_norm": 0.23555228050632473,
|
|
"learning_rate": 5.1757719315753e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41063809394836426,
|
|
"step": 5205,
|
|
"valid_targets_mean": 28860.7,
|
|
"valid_targets_min": 20761
|
|
},
|
|
{
|
|
"epoch": 5.560298826040555,
|
|
"grad_norm": 0.23131626823494578,
|
|
"learning_rate": 5.155388927306707e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40856459736824036,
|
|
"step": 5210,
|
|
"valid_targets_mean": 28866.7,
|
|
"valid_targets_min": 21514
|
|
},
|
|
{
|
|
"epoch": 5.565635005336179,
|
|
"grad_norm": 0.24583588381945762,
|
|
"learning_rate": 5.13503218975551e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4081979990005493,
|
|
"step": 5215,
|
|
"valid_targets_mean": 28815.9,
|
|
"valid_targets_min": 22727
|
|
},
|
|
{
|
|
"epoch": 5.570971184631803,
|
|
"grad_norm": 0.26323488215272317,
|
|
"learning_rate": 5.1147018292927384e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4045487344264984,
|
|
"step": 5220,
|
|
"valid_targets_mean": 28845.8,
|
|
"valid_targets_min": 22600
|
|
},
|
|
{
|
|
"epoch": 5.576307363927428,
|
|
"grad_norm": 0.2617473214116886,
|
|
"learning_rate": 5.094397956146391e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40857842564582825,
|
|
"step": 5225,
|
|
"valid_targets_mean": 28835.9,
|
|
"valid_targets_min": 22871
|
|
},
|
|
{
|
|
"epoch": 5.581643543223052,
|
|
"grad_norm": 0.230237887341813,
|
|
"learning_rate": 5.074120680400877e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4065649211406708,
|
|
"step": 5230,
|
|
"valid_targets_mean": 28784.2,
|
|
"valid_targets_min": 22150
|
|
},
|
|
{
|
|
"epoch": 5.586979722518676,
|
|
"grad_norm": 0.23212744439681787,
|
|
"learning_rate": 5.053870111996379e-05,
|
|
"loss": 0.4092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4103742241859436,
|
|
"step": 5235,
|
|
"valid_targets_mean": 28789.1,
|
|
"valid_targets_min": 22110
|
|
},
|
|
{
|
|
"epoch": 5.592315901814301,
|
|
"grad_norm": 0.2231287372517266,
|
|
"learning_rate": 5.0336463607282915e-05,
|
|
"loss": 0.4086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.411297082901001,
|
|
"step": 5240,
|
|
"valid_targets_mean": 28775.0,
|
|
"valid_targets_min": 21938
|
|
},
|
|
{
|
|
"epoch": 5.597652081109925,
|
|
"grad_norm": 0.2686855989256489,
|
|
"learning_rate": 5.013449536246607e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.411526620388031,
|
|
"step": 5245,
|
|
"valid_targets_mean": 28883.7,
|
|
"valid_targets_min": 16759
|
|
},
|
|
{
|
|
"epoch": 5.602988260405549,
|
|
"grad_norm": 0.26466533071442333,
|
|
"learning_rate": 4.9932797480553174e-05,
|
|
"loss": 0.4054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40664201974868774,
|
|
"step": 5250,
|
|
"valid_targets_mean": 28914.4,
|
|
"valid_targets_min": 23391
|
|
},
|
|
{
|
|
"epoch": 5.608324439701174,
|
|
"grad_norm": 0.21338154387922556,
|
|
"learning_rate": 4.973137105511842e-05,
|
|
"loss": 0.4084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40618956089019775,
|
|
"step": 5255,
|
|
"valid_targets_mean": 28953.4,
|
|
"valid_targets_min": 22290
|
|
},
|
|
{
|
|
"epoch": 5.613660618996798,
|
|
"grad_norm": 0.22845809094870723,
|
|
"learning_rate": 4.9530217178264035e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4072515368461609,
|
|
"step": 5260,
|
|
"valid_targets_mean": 28862.8,
|
|
"valid_targets_min": 20440
|
|
},
|
|
{
|
|
"epoch": 5.6189967982924225,
|
|
"grad_norm": 0.23349213346807032,
|
|
"learning_rate": 4.932933694061469e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4063612222671509,
|
|
"step": 5265,
|
|
"valid_targets_mean": 28641.9,
|
|
"valid_targets_min": 13903
|
|
},
|
|
{
|
|
"epoch": 5.624332977588047,
|
|
"grad_norm": 0.2624107603135616,
|
|
"learning_rate": 4.912873143131137e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40512439608573914,
|
|
"step": 5270,
|
|
"valid_targets_mean": 29003.0,
|
|
"valid_targets_min": 20872
|
|
},
|
|
{
|
|
"epoch": 5.629669156883671,
|
|
"grad_norm": 0.2226041520979999,
|
|
"learning_rate": 4.892840173800545e-05,
|
|
"loss": 0.4061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4093719720840454,
|
|
"step": 5275,
|
|
"valid_targets_mean": 29018.0,
|
|
"valid_targets_min": 18214
|
|
},
|
|
{
|
|
"epoch": 5.6350053361792956,
|
|
"grad_norm": 0.25335082989920943,
|
|
"learning_rate": 4.872834894685303e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4051392674446106,
|
|
"step": 5280,
|
|
"valid_targets_mean": 28871.5,
|
|
"valid_targets_min": 21415
|
|
},
|
|
{
|
|
"epoch": 5.64034151547492,
|
|
"grad_norm": 0.26416237862885106,
|
|
"learning_rate": 4.852857414250871e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4078800678253174,
|
|
"step": 5285,
|
|
"valid_targets_mean": 28953.2,
|
|
"valid_targets_min": 23044
|
|
},
|
|
{
|
|
"epoch": 5.645677694770544,
|
|
"grad_norm": 0.2594465825907819,
|
|
"learning_rate": 4.832907840812007e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4099881052970886,
|
|
"step": 5290,
|
|
"valid_targets_mean": 29002.3,
|
|
"valid_targets_min": 23780
|
|
},
|
|
{
|
|
"epoch": 5.651013874066169,
|
|
"grad_norm": 0.25857051667325454,
|
|
"learning_rate": 4.812986282532144e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40799394249916077,
|
|
"step": 5295,
|
|
"valid_targets_mean": 28782.3,
|
|
"valid_targets_min": 21471
|
|
},
|
|
{
|
|
"epoch": 5.656350053361793,
|
|
"grad_norm": 0.23165297682274635,
|
|
"learning_rate": 4.7930928474228345e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4110855460166931,
|
|
"step": 5300,
|
|
"valid_targets_mean": 28889.3,
|
|
"valid_targets_min": 19959
|
|
},
|
|
{
|
|
"epoch": 5.661686232657417,
|
|
"grad_norm": 0.21714603686390757,
|
|
"learning_rate": 4.7732276433431514e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4074994623661041,
|
|
"step": 5305,
|
|
"valid_targets_mean": 28760.8,
|
|
"valid_targets_min": 17978
|
|
},
|
|
{
|
|
"epoch": 5.667022411953042,
|
|
"grad_norm": 0.25121345612741014,
|
|
"learning_rate": 4.753390777999091e-05,
|
|
"loss": 0.4075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40740853548049927,
|
|
"step": 5310,
|
|
"valid_targets_mean": 28903.8,
|
|
"valid_targets_min": 18702
|
|
},
|
|
{
|
|
"epoch": 5.672358591248666,
|
|
"grad_norm": 0.24076860265064987,
|
|
"learning_rate": 4.733582358943016e-05,
|
|
"loss": 0.4086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4073009192943573,
|
|
"step": 5315,
|
|
"valid_targets_mean": 28919.6,
|
|
"valid_targets_min": 18070
|
|
},
|
|
{
|
|
"epoch": 5.67769477054429,
|
|
"grad_norm": 0.24410116917299898,
|
|
"learning_rate": 4.7138024935730464e-05,
|
|
"loss": 0.407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40609365701675415,
|
|
"step": 5320,
|
|
"valid_targets_mean": 28835.2,
|
|
"valid_targets_min": 22006
|
|
},
|
|
{
|
|
"epoch": 5.683030949839915,
|
|
"grad_norm": 0.22406370130790632,
|
|
"learning_rate": 4.6940512891324986e-05,
|
|
"loss": 0.4076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40902549028396606,
|
|
"step": 5325,
|
|
"valid_targets_mean": 28876.1,
|
|
"valid_targets_min": 17173
|
|
},
|
|
{
|
|
"epoch": 5.688367129135539,
|
|
"grad_norm": 0.2220167506558589,
|
|
"learning_rate": 4.674328852709283e-05,
|
|
"loss": 0.4062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4030117988586426,
|
|
"step": 5330,
|
|
"valid_targets_mean": 28941.9,
|
|
"valid_targets_min": 23033
|
|
},
|
|
{
|
|
"epoch": 5.6937033084311635,
|
|
"grad_norm": 0.2461889251597465,
|
|
"learning_rate": 4.654635291235343e-05,
|
|
"loss": 0.4064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40485891699790955,
|
|
"step": 5335,
|
|
"valid_targets_mean": 28843.3,
|
|
"valid_targets_min": 22189
|
|
},
|
|
{
|
|
"epoch": 5.699039487726788,
|
|
"grad_norm": 0.2868230592419842,
|
|
"learning_rate": 4.634970711486068e-05,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4057909846305847,
|
|
"step": 5340,
|
|
"valid_targets_mean": 28854.3,
|
|
"valid_targets_min": 22418
|
|
},
|
|
{
|
|
"epoch": 5.704375667022412,
|
|
"grad_norm": 0.24789472475723118,
|
|
"learning_rate": 4.615335220079703e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4078221321105957,
|
|
"step": 5345,
|
|
"valid_targets_mean": 28942.5,
|
|
"valid_targets_min": 21262
|
|
},
|
|
{
|
|
"epoch": 5.7097118463180365,
|
|
"grad_norm": 0.25318073622340376,
|
|
"learning_rate": 4.595728923476792e-05,
|
|
"loss": 0.4069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40469861030578613,
|
|
"step": 5350,
|
|
"valid_targets_mean": 28845.2,
|
|
"valid_targets_min": 19004
|
|
},
|
|
{
|
|
"epoch": 5.715048025613661,
|
|
"grad_norm": 0.2580212397753865,
|
|
"learning_rate": 4.576151927979577e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4108935594558716,
|
|
"step": 5355,
|
|
"valid_targets_mean": 28843.6,
|
|
"valid_targets_min": 19456
|
|
},
|
|
{
|
|
"epoch": 5.720384204909285,
|
|
"grad_norm": 0.22310778312178198,
|
|
"learning_rate": 4.556604339731445e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41052567958831787,
|
|
"step": 5360,
|
|
"valid_targets_mean": 28791.6,
|
|
"valid_targets_min": 18590
|
|
},
|
|
{
|
|
"epoch": 5.72572038420491,
|
|
"grad_norm": 0.2681032818849847,
|
|
"learning_rate": 4.537086264716341e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4068934917449951,
|
|
"step": 5365,
|
|
"valid_targets_mean": 28867.4,
|
|
"valid_targets_min": 23073
|
|
},
|
|
{
|
|
"epoch": 5.731056563500534,
|
|
"grad_norm": 0.23148769627141919,
|
|
"learning_rate": 4.517597808758183e-05,
|
|
"loss": 0.4054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4052630662918091,
|
|
"step": 5370,
|
|
"valid_targets_mean": 28865.1,
|
|
"valid_targets_min": 21222
|
|
},
|
|
{
|
|
"epoch": 5.736392742796158,
|
|
"grad_norm": 0.22042817501892076,
|
|
"learning_rate": 4.498139077520314e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4036562442779541,
|
|
"step": 5375,
|
|
"valid_targets_mean": 28880.2,
|
|
"valid_targets_min": 19297
|
|
},
|
|
{
|
|
"epoch": 5.741728922091783,
|
|
"grad_norm": 0.23595064942007865,
|
|
"learning_rate": 4.478710176504898e-05,
|
|
"loss": 0.4061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4078393876552582,
|
|
"step": 5380,
|
|
"valid_targets_mean": 28741.1,
|
|
"valid_targets_min": 19445
|
|
},
|
|
{
|
|
"epoch": 5.747065101387407,
|
|
"grad_norm": 0.2356224677462118,
|
|
"learning_rate": 4.459311211052382e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40955275297164917,
|
|
"step": 5385,
|
|
"valid_targets_mean": 28913.4,
|
|
"valid_targets_min": 22165
|
|
},
|
|
{
|
|
"epoch": 5.752401280683031,
|
|
"grad_norm": 0.23613025476452645,
|
|
"learning_rate": 4.43994228634089e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4061693251132965,
|
|
"step": 5390,
|
|
"valid_targets_mean": 28761.6,
|
|
"valid_targets_min": 16988
|
|
},
|
|
{
|
|
"epoch": 5.757737459978655,
|
|
"grad_norm": 0.2509453674002872,
|
|
"learning_rate": 4.4206035073856834e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41114407777786255,
|
|
"step": 5395,
|
|
"valid_targets_mean": 28747.3,
|
|
"valid_targets_min": 17862
|
|
},
|
|
{
|
|
"epoch": 5.76307363927428,
|
|
"grad_norm": 0.21299150498723463,
|
|
"learning_rate": 4.4012949790385736e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40109744668006897,
|
|
"step": 5400,
|
|
"valid_targets_mean": 28773.6,
|
|
"valid_targets_min": 19732
|
|
},
|
|
{
|
|
"epoch": 5.7684098185699035,
|
|
"grad_norm": 0.24642753596991596,
|
|
"learning_rate": 4.382016805987351e-05,
|
|
"loss": 0.407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4072338044643402,
|
|
"step": 5405,
|
|
"valid_targets_mean": 28985.3,
|
|
"valid_targets_min": 21313
|
|
},
|
|
{
|
|
"epoch": 5.773745997865529,
|
|
"grad_norm": 0.22244687905504107,
|
|
"learning_rate": 4.362769092755237e-05,
|
|
"loss": 0.4061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40808022022247314,
|
|
"step": 5410,
|
|
"valid_targets_mean": 28903.3,
|
|
"valid_targets_min": 18221
|
|
},
|
|
{
|
|
"epoch": 5.779082177161152,
|
|
"grad_norm": 0.2634528950267717,
|
|
"learning_rate": 4.343551943700291e-05,
|
|
"loss": 0.4065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4094908833503723,
|
|
"step": 5415,
|
|
"valid_targets_mean": 28849.7,
|
|
"valid_targets_min": 20021
|
|
},
|
|
{
|
|
"epoch": 5.784418356456777,
|
|
"grad_norm": 0.21314653878241394,
|
|
"learning_rate": 4.324365463014871e-05,
|
|
"loss": 0.4065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4070976674556732,
|
|
"step": 5420,
|
|
"valid_targets_mean": 28839.5,
|
|
"valid_targets_min": 21489
|
|
},
|
|
{
|
|
"epoch": 5.789754535752401,
|
|
"grad_norm": 0.2333881981036085,
|
|
"learning_rate": 4.3052097547250504e-05,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40807241201400757,
|
|
"step": 5425,
|
|
"valid_targets_mean": 28919.2,
|
|
"valid_targets_min": 21215
|
|
},
|
|
{
|
|
"epoch": 5.795090715048025,
|
|
"grad_norm": 0.22877396245428053,
|
|
"learning_rate": 4.2860849226900545e-05,
|
|
"loss": 0.4052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40419960021972656,
|
|
"step": 5430,
|
|
"valid_targets_mean": 28771.1,
|
|
"valid_targets_min": 21422
|
|
},
|
|
{
|
|
"epoch": 5.80042689434365,
|
|
"grad_norm": 0.25836705289249745,
|
|
"learning_rate": 4.2669910706017124e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4077407121658325,
|
|
"step": 5435,
|
|
"valid_targets_mean": 28994.6,
|
|
"valid_targets_min": 22465
|
|
},
|
|
{
|
|
"epoch": 5.805763073639274,
|
|
"grad_norm": 0.22522683738726615,
|
|
"learning_rate": 4.2479283019838743e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.401361882686615,
|
|
"step": 5440,
|
|
"valid_targets_mean": 28920.0,
|
|
"valid_targets_min": 18753
|
|
},
|
|
{
|
|
"epoch": 5.811099252934898,
|
|
"grad_norm": 0.22685710627505415,
|
|
"learning_rate": 4.228896720191873e-05,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4064394235610962,
|
|
"step": 5445,
|
|
"valid_targets_mean": 29037.0,
|
|
"valid_targets_min": 19954
|
|
},
|
|
{
|
|
"epoch": 5.816435432230523,
|
|
"grad_norm": 0.20142130133877767,
|
|
"learning_rate": 4.209896428411936e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40420037508010864,
|
|
"step": 5450,
|
|
"valid_targets_mean": 28870.6,
|
|
"valid_targets_min": 18563
|
|
},
|
|
{
|
|
"epoch": 5.821771611526147,
|
|
"grad_norm": 0.20801333433848126,
|
|
"learning_rate": 4.190927529660653e-05,
|
|
"loss": 0.4056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4038311243057251,
|
|
"step": 5455,
|
|
"valid_targets_mean": 28757.4,
|
|
"valid_targets_min": 22298
|
|
},
|
|
{
|
|
"epoch": 5.827107790821771,
|
|
"grad_norm": 0.23230643362340275,
|
|
"learning_rate": 4.1719901267844086e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40489107370376587,
|
|
"step": 5460,
|
|
"valid_targets_mean": 28917.4,
|
|
"valid_targets_min": 21333
|
|
},
|
|
{
|
|
"epoch": 5.832443970117396,
|
|
"grad_norm": 0.2164401832868788,
|
|
"learning_rate": 4.153084322458806e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4066731631755829,
|
|
"step": 5465,
|
|
"valid_targets_mean": 28922.4,
|
|
"valid_targets_min": 23138
|
|
},
|
|
{
|
|
"epoch": 5.83778014941302,
|
|
"grad_norm": 0.21951841350335208,
|
|
"learning_rate": 4.134210219188141e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40590178966522217,
|
|
"step": 5470,
|
|
"valid_targets_mean": 28929.7,
|
|
"valid_targets_min": 22171
|
|
},
|
|
{
|
|
"epoch": 5.8431163287086445,
|
|
"grad_norm": 0.27913810326540894,
|
|
"learning_rate": 4.115367919304822e-05,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40679121017456055,
|
|
"step": 5475,
|
|
"valid_targets_mean": 28965.1,
|
|
"valid_targets_min": 19535
|
|
},
|
|
{
|
|
"epoch": 5.848452508004269,
|
|
"grad_norm": 0.2114164051428383,
|
|
"learning_rate": 4.0965575249688336e-05,
|
|
"loss": 0.4056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.403158962726593,
|
|
"step": 5480,
|
|
"valid_targets_mean": 28941.9,
|
|
"valid_targets_min": 20187
|
|
},
|
|
{
|
|
"epoch": 5.853788687299893,
|
|
"grad_norm": 0.256277757621636,
|
|
"learning_rate": 4.077779138167161e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4057876169681549,
|
|
"step": 5485,
|
|
"valid_targets_mean": 28857.4,
|
|
"valid_targets_min": 20301
|
|
},
|
|
{
|
|
"epoch": 5.859124866595518,
|
|
"grad_norm": 0.21061324841836357,
|
|
"learning_rate": 4.059032860713261e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40722543001174927,
|
|
"step": 5490,
|
|
"valid_targets_mean": 28856.1,
|
|
"valid_targets_min": 19253
|
|
},
|
|
{
|
|
"epoch": 5.864461045891142,
|
|
"grad_norm": 0.2518110545376062,
|
|
"learning_rate": 4.040318794246497e-05,
|
|
"loss": 0.4054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40780001878738403,
|
|
"step": 5495,
|
|
"valid_targets_mean": 28901.4,
|
|
"valid_targets_min": 21836
|
|
},
|
|
{
|
|
"epoch": 5.869797225186766,
|
|
"grad_norm": 0.2123751768365746,
|
|
"learning_rate": 4.0216370402315796e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40632835030555725,
|
|
"step": 5500,
|
|
"valid_targets_mean": 28909.4,
|
|
"valid_targets_min": 20239
|
|
},
|
|
{
|
|
"epoch": 5.875133404482391,
|
|
"grad_norm": 0.21594602205244454,
|
|
"learning_rate": 4.002987699958042e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40827763080596924,
|
|
"step": 5505,
|
|
"valid_targets_mean": 28695.7,
|
|
"valid_targets_min": 18285
|
|
},
|
|
{
|
|
"epoch": 5.880469583778015,
|
|
"grad_norm": 0.21362367358291603,
|
|
"learning_rate": 3.984370874539657e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4049973487854004,
|
|
"step": 5510,
|
|
"valid_targets_mean": 28791.7,
|
|
"valid_targets_min": 20225
|
|
},
|
|
{
|
|
"epoch": 5.885805763073639,
|
|
"grad_norm": 0.24153766266989785,
|
|
"learning_rate": 3.965786664913923e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40766391158103943,
|
|
"step": 5515,
|
|
"valid_targets_mean": 28825.4,
|
|
"valid_targets_min": 22899
|
|
},
|
|
{
|
|
"epoch": 5.891141942369264,
|
|
"grad_norm": 0.23261290774247123,
|
|
"learning_rate": 3.947235171841497e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40966296195983887,
|
|
"step": 5520,
|
|
"valid_targets_mean": 28849.6,
|
|
"valid_targets_min": 21756
|
|
},
|
|
{
|
|
"epoch": 5.896478121664888,
|
|
"grad_norm": 0.24097588461676905,
|
|
"learning_rate": 3.928716495905642e-05,
|
|
"loss": 0.4062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4064144492149353,
|
|
"step": 5525,
|
|
"valid_targets_mean": 29012.6,
|
|
"valid_targets_min": 20124
|
|
},
|
|
{
|
|
"epoch": 5.901814300960512,
|
|
"grad_norm": 0.21967799647204173,
|
|
"learning_rate": 3.9102307375117064e-05,
|
|
"loss": 0.4043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40806692838668823,
|
|
"step": 5530,
|
|
"valid_targets_mean": 28692.7,
|
|
"valid_targets_min": 21532
|
|
},
|
|
{
|
|
"epoch": 5.907150480256137,
|
|
"grad_norm": 0.21285101448089871,
|
|
"learning_rate": 3.891777996886551e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4044090211391449,
|
|
"step": 5535,
|
|
"valid_targets_mean": 28888.1,
|
|
"valid_targets_min": 22096
|
|
},
|
|
{
|
|
"epoch": 5.912486659551761,
|
|
"grad_norm": 0.2069155375792718,
|
|
"learning_rate": 3.873358374078033e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40243616700172424,
|
|
"step": 5540,
|
|
"valid_targets_mean": 28860.0,
|
|
"valid_targets_min": 18914
|
|
},
|
|
{
|
|
"epoch": 5.9178228388473855,
|
|
"grad_norm": 0.21705205284159565,
|
|
"learning_rate": 3.854971968954436e-05,
|
|
"loss": 0.4043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.401405394077301,
|
|
"step": 5545,
|
|
"valid_targets_mean": 28765.5,
|
|
"valid_targets_min": 17484
|
|
},
|
|
{
|
|
"epoch": 5.92315901814301,
|
|
"grad_norm": 0.2415797773522953,
|
|
"learning_rate": 3.836618881203953e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40842103958129883,
|
|
"step": 5550,
|
|
"valid_targets_mean": 28809.4,
|
|
"valid_targets_min": 21450
|
|
},
|
|
{
|
|
"epoch": 5.928495197438634,
|
|
"grad_norm": 0.265004048605028,
|
|
"learning_rate": 3.8182992103341355e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40405726432800293,
|
|
"step": 5555,
|
|
"valid_targets_mean": 28818.4,
|
|
"valid_targets_min": 19185
|
|
},
|
|
{
|
|
"epoch": 5.9338313767342585,
|
|
"grad_norm": 0.21529807239273552,
|
|
"learning_rate": 3.800013055671343e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40286242961883545,
|
|
"step": 5560,
|
|
"valid_targets_mean": 28765.6,
|
|
"valid_targets_min": 18225
|
|
},
|
|
{
|
|
"epoch": 5.939167556029883,
|
|
"grad_norm": 0.21081684843965076,
|
|
"learning_rate": 3.781760516360232e-05,
|
|
"loss": 0.4061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4038587212562561,
|
|
"step": 5565,
|
|
"valid_targets_mean": 28933.4,
|
|
"valid_targets_min": 20988
|
|
},
|
|
{
|
|
"epoch": 5.944503735325507,
|
|
"grad_norm": 0.21843522151704156,
|
|
"learning_rate": 3.7635416913631874e-05,
|
|
"loss": 0.4051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40666764974594116,
|
|
"step": 5570,
|
|
"valid_targets_mean": 28948.6,
|
|
"valid_targets_min": 18118
|
|
},
|
|
{
|
|
"epoch": 5.949839914621132,
|
|
"grad_norm": 0.2038687063842565,
|
|
"learning_rate": 3.745356679459803e-05,
|
|
"loss": 0.4054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4015902280807495,
|
|
"step": 5575,
|
|
"valid_targets_mean": 28850.7,
|
|
"valid_targets_min": 22683
|
|
},
|
|
{
|
|
"epoch": 5.955176093916756,
|
|
"grad_norm": 0.24391497724401612,
|
|
"learning_rate": 3.727205579246349e-05,
|
|
"loss": 0.4059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40471071004867554,
|
|
"step": 5580,
|
|
"valid_targets_mean": 29030.9,
|
|
"valid_targets_min": 19158
|
|
},
|
|
{
|
|
"epoch": 5.96051227321238,
|
|
"grad_norm": 0.27555184336682575,
|
|
"learning_rate": 3.70908848913523e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40131568908691406,
|
|
"step": 5585,
|
|
"valid_targets_mean": 28900.0,
|
|
"valid_targets_min": 21973
|
|
},
|
|
{
|
|
"epoch": 5.965848452508004,
|
|
"grad_norm": 0.21948801292048828,
|
|
"learning_rate": 3.6910055073544545e-05,
|
|
"loss": 0.4061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4104491174221039,
|
|
"step": 5590,
|
|
"valid_targets_mean": 28758.2,
|
|
"valid_targets_min": 16583
|
|
},
|
|
{
|
|
"epoch": 5.971184631803629,
|
|
"grad_norm": 0.21597893371804786,
|
|
"learning_rate": 3.672956731947093e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4032149910926819,
|
|
"step": 5595,
|
|
"valid_targets_mean": 28712.1,
|
|
"valid_targets_min": 18488
|
|
},
|
|
{
|
|
"epoch": 5.9765208110992525,
|
|
"grad_norm": 0.23374622037111217,
|
|
"learning_rate": 3.6549422607707664e-05,
|
|
"loss": 0.4056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4101329445838928,
|
|
"step": 5600,
|
|
"valid_targets_mean": 28952.7,
|
|
"valid_targets_min": 23291
|
|
},
|
|
{
|
|
"epoch": 5.981856990394878,
|
|
"grad_norm": 0.225947924540093,
|
|
"learning_rate": 3.636962191497089e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4063754081726074,
|
|
"step": 5605,
|
|
"valid_targets_mean": 28869.9,
|
|
"valid_targets_min": 22004
|
|
},
|
|
{
|
|
"epoch": 5.987193169690501,
|
|
"grad_norm": 0.24990410081389533,
|
|
"learning_rate": 3.6190166216111666e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4043414890766144,
|
|
"step": 5610,
|
|
"valid_targets_mean": 29032.0,
|
|
"valid_targets_min": 20042
|
|
},
|
|
{
|
|
"epoch": 5.9925293489861255,
|
|
"grad_norm": 0.23553235177171936,
|
|
"learning_rate": 3.6011056484110525e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40443944931030273,
|
|
"step": 5615,
|
|
"valid_targets_mean": 29001.4,
|
|
"valid_targets_min": 19151
|
|
},
|
|
{
|
|
"epoch": 5.99786552828175,
|
|
"grad_norm": 0.2054776217712033,
|
|
"learning_rate": 3.5832293690072125e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4076521396636963,
|
|
"step": 5620,
|
|
"valid_targets_mean": 28850.6,
|
|
"valid_targets_min": 22676
|
|
},
|
|
{
|
|
"epoch": 6.003201707577374,
|
|
"grad_norm": 0.22868682159600212,
|
|
"learning_rate": 3.565387880322022e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4057372808456421,
|
|
"step": 5625,
|
|
"valid_targets_mean": 28841.2,
|
|
"valid_targets_min": 22815
|
|
},
|
|
{
|
|
"epoch": 6.008537886872999,
|
|
"grad_norm": 0.21873814325197052,
|
|
"learning_rate": 3.547581279089216e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4041280746459961,
|
|
"step": 5630,
|
|
"valid_targets_mean": 28757.2,
|
|
"valid_targets_min": 20584
|
|
},
|
|
{
|
|
"epoch": 6.013874066168623,
|
|
"grad_norm": 0.2037817066603688,
|
|
"learning_rate": 3.529809661853376e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4051162898540497,
|
|
"step": 5635,
|
|
"valid_targets_mean": 28794.5,
|
|
"valid_targets_min": 22709
|
|
},
|
|
{
|
|
"epoch": 6.019210245464247,
|
|
"grad_norm": 0.21354179484688512,
|
|
"learning_rate": 3.5120731249694114e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40277063846588135,
|
|
"step": 5640,
|
|
"valid_targets_mean": 28776.8,
|
|
"valid_targets_min": 19899
|
|
},
|
|
{
|
|
"epoch": 6.024546424759872,
|
|
"grad_norm": 0.21073033421592155,
|
|
"learning_rate": 3.494371764602029e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4004727602005005,
|
|
"step": 5645,
|
|
"valid_targets_mean": 28738.6,
|
|
"valid_targets_min": 21317
|
|
},
|
|
{
|
|
"epoch": 6.029882604055496,
|
|
"grad_norm": 0.27975450593983187,
|
|
"learning_rate": 3.476705676725218e-05,
|
|
"loss": 0.4029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4047626852989197,
|
|
"step": 5650,
|
|
"valid_targets_mean": 28777.9,
|
|
"valid_targets_min": 22491
|
|
},
|
|
{
|
|
"epoch": 6.03521878335112,
|
|
"grad_norm": 0.2253042599562701,
|
|
"learning_rate": 3.459074957121713e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40415048599243164,
|
|
"step": 5655,
|
|
"valid_targets_mean": 28801.4,
|
|
"valid_targets_min": 20094
|
|
},
|
|
{
|
|
"epoch": 6.040554962646745,
|
|
"grad_norm": 0.23375326699273516,
|
|
"learning_rate": 3.441479701382504e-05,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39832621812820435,
|
|
"step": 5660,
|
|
"valid_targets_mean": 28750.8,
|
|
"valid_targets_min": 19771
|
|
},
|
|
{
|
|
"epoch": 6.045891141942369,
|
|
"grad_norm": 0.2198729246111239,
|
|
"learning_rate": 3.423920004906289e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40202754735946655,
|
|
"step": 5665,
|
|
"valid_targets_mean": 28771.3,
|
|
"valid_targets_min": 20397
|
|
},
|
|
{
|
|
"epoch": 6.0512273212379935,
|
|
"grad_norm": 0.22331278226059176,
|
|
"learning_rate": 3.4063959628989684e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40373921394348145,
|
|
"step": 5670,
|
|
"valid_targets_mean": 28709.1,
|
|
"valid_targets_min": 19664
|
|
},
|
|
{
|
|
"epoch": 6.056563500533618,
|
|
"grad_norm": 0.2094224833863894,
|
|
"learning_rate": 3.388907670373147e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40180504322052,
|
|
"step": 5675,
|
|
"valid_targets_mean": 28815.6,
|
|
"valid_targets_min": 20582
|
|
},
|
|
{
|
|
"epoch": 6.061899679829242,
|
|
"grad_norm": 0.25422996974709433,
|
|
"learning_rate": 3.3714552221475805e-05,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4039395749568939,
|
|
"step": 5680,
|
|
"valid_targets_mean": 28961.4,
|
|
"valid_targets_min": 21772
|
|
},
|
|
{
|
|
"epoch": 6.0672358591248665,
|
|
"grad_norm": 0.19409749836193413,
|
|
"learning_rate": 3.354038712846698e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40708667039871216,
|
|
"step": 5685,
|
|
"valid_targets_mean": 28967.0,
|
|
"valid_targets_min": 19766
|
|
},
|
|
{
|
|
"epoch": 6.072572038420491,
|
|
"grad_norm": 0.23052130057371095,
|
|
"learning_rate": 3.336658236900062e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4013357162475586,
|
|
"step": 5690,
|
|
"valid_targets_mean": 28754.0,
|
|
"valid_targets_min": 21630
|
|
},
|
|
{
|
|
"epoch": 6.077908217716115,
|
|
"grad_norm": 0.2231430978668057,
|
|
"learning_rate": 3.3193138885418715e-05,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4056231379508972,
|
|
"step": 5695,
|
|
"valid_targets_mean": 28825.4,
|
|
"valid_targets_min": 21956
|
|
},
|
|
{
|
|
"epoch": 6.08324439701174,
|
|
"grad_norm": 0.21788060543120694,
|
|
"learning_rate": 3.302005761810453e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4046787917613983,
|
|
"step": 5700,
|
|
"valid_targets_mean": 28996.1,
|
|
"valid_targets_min": 19864
|
|
},
|
|
{
|
|
"epoch": 6.088580576307364,
|
|
"grad_norm": 0.22295642951669387,
|
|
"learning_rate": 3.2847339505477374e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4034949541091919,
|
|
"step": 5705,
|
|
"valid_targets_mean": 28886.7,
|
|
"valid_targets_min": 18510
|
|
},
|
|
{
|
|
"epoch": 6.093916755602988,
|
|
"grad_norm": 0.23010510678057203,
|
|
"learning_rate": 3.267498548398766e-05,
|
|
"loss": 0.4052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40143734216690063,
|
|
"step": 5710,
|
|
"valid_targets_mean": 28961.2,
|
|
"valid_targets_min": 19954
|
|
},
|
|
{
|
|
"epoch": 6.099252934898613,
|
|
"grad_norm": 0.20873053646578818,
|
|
"learning_rate": 3.250299648811168e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40607360005378723,
|
|
"step": 5715,
|
|
"valid_targets_mean": 28922.8,
|
|
"valid_targets_min": 19932
|
|
},
|
|
{
|
|
"epoch": 6.104589114194237,
|
|
"grad_norm": 0.2083861463262958,
|
|
"learning_rate": 3.233137345034669e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40308186411857605,
|
|
"step": 5720,
|
|
"valid_targets_mean": 28935.9,
|
|
"valid_targets_min": 21625
|
|
},
|
|
{
|
|
"epoch": 6.109925293489861,
|
|
"grad_norm": 0.2043950913043271,
|
|
"learning_rate": 3.2160117301205726e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40011361241340637,
|
|
"step": 5725,
|
|
"valid_targets_mean": 28786.4,
|
|
"valid_targets_min": 16026
|
|
},
|
|
{
|
|
"epoch": 6.115261472785486,
|
|
"grad_norm": 0.20300023613652335,
|
|
"learning_rate": 3.198922896921259e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40243393182754517,
|
|
"step": 5730,
|
|
"valid_targets_mean": 28795.3,
|
|
"valid_targets_min": 21856
|
|
},
|
|
{
|
|
"epoch": 6.12059765208111,
|
|
"grad_norm": 0.20243128119834547,
|
|
"learning_rate": 3.1818709380896905e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3996739983558655,
|
|
"step": 5735,
|
|
"valid_targets_mean": 28954.9,
|
|
"valid_targets_min": 20610
|
|
},
|
|
{
|
|
"epoch": 6.125933831376734,
|
|
"grad_norm": 0.20548029321838865,
|
|
"learning_rate": 3.164855946078899e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40435925126075745,
|
|
"step": 5740,
|
|
"valid_targets_mean": 28726.2,
|
|
"valid_targets_min": 16033
|
|
},
|
|
{
|
|
"epoch": 6.131270010672359,
|
|
"grad_norm": 0.20211522984866082,
|
|
"learning_rate": 3.147878013141492e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39486247301101685,
|
|
"step": 5745,
|
|
"valid_targets_mean": 29027.4,
|
|
"valid_targets_min": 23070
|
|
},
|
|
{
|
|
"epoch": 6.136606189967983,
|
|
"grad_norm": 0.2127912730791383,
|
|
"learning_rate": 3.130937231329142e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40694287419319153,
|
|
"step": 5750,
|
|
"valid_targets_mean": 28807.9,
|
|
"valid_targets_min": 20486
|
|
},
|
|
{
|
|
"epoch": 6.1419423692636075,
|
|
"grad_norm": 0.20031145012395565,
|
|
"learning_rate": 3.114033692492093e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4004095196723938,
|
|
"step": 5755,
|
|
"valid_targets_mean": 28869.6,
|
|
"valid_targets_min": 20911
|
|
},
|
|
{
|
|
"epoch": 6.147278548559232,
|
|
"grad_norm": 0.20549511785676874,
|
|
"learning_rate": 3.097167488278672e-05,
|
|
"loss": 0.4024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4021293520927429,
|
|
"step": 5760,
|
|
"valid_targets_mean": 28929.9,
|
|
"valid_targets_min": 20613
|
|
},
|
|
{
|
|
"epoch": 6.152614727854856,
|
|
"grad_norm": 0.20927356784178391,
|
|
"learning_rate": 3.080338710134778e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40633559226989746,
|
|
"step": 5765,
|
|
"valid_targets_mean": 29096.6,
|
|
"valid_targets_min": 21570
|
|
},
|
|
{
|
|
"epoch": 6.1579509071504805,
|
|
"grad_norm": 0.21312244449480242,
|
|
"learning_rate": 3.063547449303397e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40091001987457275,
|
|
"step": 5770,
|
|
"valid_targets_mean": 28731.6,
|
|
"valid_targets_min": 21337
|
|
},
|
|
{
|
|
"epoch": 6.163287086446105,
|
|
"grad_norm": 0.21230355994256525,
|
|
"learning_rate": 3.04679379682409e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40138083696365356,
|
|
"step": 5775,
|
|
"valid_targets_mean": 28948.2,
|
|
"valid_targets_min": 23092
|
|
},
|
|
{
|
|
"epoch": 6.168623265741729,
|
|
"grad_norm": 0.1976015266522106,
|
|
"learning_rate": 3.030077843532526e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40313124656677246,
|
|
"step": 5780,
|
|
"valid_targets_mean": 28983.9,
|
|
"valid_targets_min": 20284
|
|
},
|
|
{
|
|
"epoch": 6.173959445037354,
|
|
"grad_norm": 0.20487890681336945,
|
|
"learning_rate": 3.013399680059965e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4016597867012024,
|
|
"step": 5785,
|
|
"valid_targets_mean": 28889.0,
|
|
"valid_targets_min": 18214
|
|
},
|
|
{
|
|
"epoch": 6.179295624332978,
|
|
"grad_norm": 0.19989947369680958,
|
|
"learning_rate": 2.996759396832778e-05,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40249523520469666,
|
|
"step": 5790,
|
|
"valid_targets_mean": 28888.5,
|
|
"valid_targets_min": 22390
|
|
},
|
|
{
|
|
"epoch": 6.184631803628602,
|
|
"grad_norm": 0.19919529330401847,
|
|
"learning_rate": 2.980157084071957e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4005407691001892,
|
|
"step": 5795,
|
|
"valid_targets_mean": 28789.4,
|
|
"valid_targets_min": 20334
|
|
},
|
|
{
|
|
"epoch": 6.189967982924227,
|
|
"grad_norm": 0.2221574146814808,
|
|
"learning_rate": 2.9635928317926277e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40457186102867126,
|
|
"step": 5800,
|
|
"valid_targets_mean": 28785.1,
|
|
"valid_targets_min": 17119
|
|
},
|
|
{
|
|
"epoch": 6.19530416221985,
|
|
"grad_norm": 0.1990827141497274,
|
|
"learning_rate": 2.9470667298035558e-05,
|
|
"loss": 0.4054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40230417251586914,
|
|
"step": 5805,
|
|
"valid_targets_mean": 28801.1,
|
|
"valid_targets_min": 20516
|
|
},
|
|
{
|
|
"epoch": 6.2006403415154745,
|
|
"grad_norm": 0.21931541168701757,
|
|
"learning_rate": 2.930578867706657e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39906126260757446,
|
|
"step": 5810,
|
|
"valid_targets_mean": 28834.8,
|
|
"valid_targets_min": 20373
|
|
},
|
|
{
|
|
"epoch": 6.205976520811099,
|
|
"grad_norm": 0.19070110342281413,
|
|
"learning_rate": 2.9141293348965183e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4032185971736908,
|
|
"step": 5815,
|
|
"valid_targets_mean": 28856.1,
|
|
"valid_targets_min": 19452
|
|
},
|
|
{
|
|
"epoch": 6.211312700106723,
|
|
"grad_norm": 0.2070312687844574,
|
|
"learning_rate": 2.8977182205599184e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40679287910461426,
|
|
"step": 5820,
|
|
"valid_targets_mean": 28810.0,
|
|
"valid_targets_min": 20350
|
|
},
|
|
{
|
|
"epoch": 6.216648879402348,
|
|
"grad_norm": 0.1973782077994425,
|
|
"learning_rate": 2.8813456136753213e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4033341407775879,
|
|
"step": 5825,
|
|
"valid_targets_mean": 28874.5,
|
|
"valid_targets_min": 18152
|
|
},
|
|
{
|
|
"epoch": 6.221985058697972,
|
|
"grad_norm": 0.2137726492190117,
|
|
"learning_rate": 2.8650116030124353e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40437471866607666,
|
|
"step": 5830,
|
|
"valid_targets_mean": 28704.6,
|
|
"valid_targets_min": 22485
|
|
},
|
|
{
|
|
"epoch": 6.227321237993596,
|
|
"grad_norm": 0.2234305179964095,
|
|
"learning_rate": 2.8487162771316787e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40185803174972534,
|
|
"step": 5835,
|
|
"valid_targets_mean": 28684.6,
|
|
"valid_targets_min": 21339
|
|
},
|
|
{
|
|
"epoch": 6.232657417289221,
|
|
"grad_norm": 0.18891512696399268,
|
|
"learning_rate": 2.832459724383748e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.400049090385437,
|
|
"step": 5840,
|
|
"valid_targets_mean": 28835.2,
|
|
"valid_targets_min": 20750
|
|
},
|
|
{
|
|
"epoch": 6.237993596584845,
|
|
"grad_norm": 0.1941215255944952,
|
|
"learning_rate": 2.8162420329091034e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4041074812412262,
|
|
"step": 5845,
|
|
"valid_targets_mean": 28848.6,
|
|
"valid_targets_min": 21970
|
|
},
|
|
{
|
|
"epoch": 6.243329775880469,
|
|
"grad_norm": 0.20108340167380698,
|
|
"learning_rate": 2.80006329063751e-05,
|
|
"loss": 0.404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40438374876976013,
|
|
"step": 5850,
|
|
"valid_targets_mean": 28858.2,
|
|
"valid_targets_min": 22768
|
|
},
|
|
{
|
|
"epoch": 6.248665955176094,
|
|
"grad_norm": 0.18986968645064523,
|
|
"learning_rate": 2.783923585287559e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40219926834106445,
|
|
"step": 5855,
|
|
"valid_targets_mean": 28752.1,
|
|
"valid_targets_min": 22450
|
|
},
|
|
{
|
|
"epoch": 6.254002134471718,
|
|
"grad_norm": 0.19131675041327859,
|
|
"learning_rate": 2.76782300436619e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40058252215385437,
|
|
"step": 5860,
|
|
"valid_targets_mean": 28910.0,
|
|
"valid_targets_min": 23690
|
|
},
|
|
{
|
|
"epoch": 6.259338313767342,
|
|
"grad_norm": 0.20587972668628907,
|
|
"learning_rate": 2.7517616351682153e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40199992060661316,
|
|
"step": 5865,
|
|
"valid_targets_mean": 28825.8,
|
|
"valid_targets_min": 21820
|
|
},
|
|
{
|
|
"epoch": 6.264674493062967,
|
|
"grad_norm": 0.22498247411615535,
|
|
"learning_rate": 2.7357395647758454e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4067252576351166,
|
|
"step": 5870,
|
|
"valid_targets_mean": 28796.7,
|
|
"valid_targets_min": 21420
|
|
},
|
|
{
|
|
"epoch": 6.270010672358591,
|
|
"grad_norm": 0.18859149173847364,
|
|
"learning_rate": 2.719756880058214e-05,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4027019441127777,
|
|
"step": 5875,
|
|
"valid_targets_mean": 28784.4,
|
|
"valid_targets_min": 18739
|
|
},
|
|
{
|
|
"epoch": 6.2753468516542155,
|
|
"grad_norm": 0.21404109929128848,
|
|
"learning_rate": 2.7038136676709268e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40717530250549316,
|
|
"step": 5880,
|
|
"valid_targets_mean": 28924.2,
|
|
"valid_targets_min": 18971
|
|
},
|
|
{
|
|
"epoch": 6.28068303094984,
|
|
"grad_norm": 0.2130469328147773,
|
|
"learning_rate": 2.6879100140555592e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4079652428627014,
|
|
"step": 5885,
|
|
"valid_targets_mean": 28976.3,
|
|
"valid_targets_min": 22673
|
|
},
|
|
{
|
|
"epoch": 6.286019210245464,
|
|
"grad_norm": 0.2008718404415538,
|
|
"learning_rate": 2.6720460054392183e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40473008155822754,
|
|
"step": 5890,
|
|
"valid_targets_mean": 28953.7,
|
|
"valid_targets_min": 20946
|
|
},
|
|
{
|
|
"epoch": 6.2913553895410885,
|
|
"grad_norm": 0.21367693351351572,
|
|
"learning_rate": 2.656221727834056e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4048454165458679,
|
|
"step": 5895,
|
|
"valid_targets_mean": 28883.1,
|
|
"valid_targets_min": 22808
|
|
},
|
|
{
|
|
"epoch": 6.296691568836713,
|
|
"grad_norm": 0.18360734367399942,
|
|
"learning_rate": 2.640437267036815e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40429389476776123,
|
|
"step": 5900,
|
|
"valid_targets_mean": 28847.2,
|
|
"valid_targets_min": 21654
|
|
},
|
|
{
|
|
"epoch": 6.302027748132337,
|
|
"grad_norm": 0.19376965246444747,
|
|
"learning_rate": 2.624692708628349e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40079379081726074,
|
|
"step": 5905,
|
|
"valid_targets_mean": 28974.9,
|
|
"valid_targets_min": 21683
|
|
},
|
|
{
|
|
"epoch": 6.307363927427962,
|
|
"grad_norm": 0.1980799701228368,
|
|
"learning_rate": 2.6089881379731684e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40692567825317383,
|
|
"step": 5910,
|
|
"valid_targets_mean": 28900.9,
|
|
"valid_targets_min": 21402
|
|
},
|
|
{
|
|
"epoch": 6.312700106723586,
|
|
"grad_norm": 0.1805305637412513,
|
|
"learning_rate": 2.593323640218983e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.403741717338562,
|
|
"step": 5915,
|
|
"valid_targets_mean": 28839.7,
|
|
"valid_targets_min": 22345
|
|
},
|
|
{
|
|
"epoch": 6.31803628601921,
|
|
"grad_norm": 0.17499517264808107,
|
|
"learning_rate": 2.577699300296229e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4021400511264801,
|
|
"step": 5920,
|
|
"valid_targets_mean": 28891.9,
|
|
"valid_targets_min": 21526
|
|
},
|
|
{
|
|
"epoch": 6.323372465314835,
|
|
"grad_norm": 0.17937660832698205,
|
|
"learning_rate": 2.5621152029176175e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4012629985809326,
|
|
"step": 5925,
|
|
"valid_targets_mean": 28952.7,
|
|
"valid_targets_min": 22270
|
|
},
|
|
{
|
|
"epoch": 6.328708644610459,
|
|
"grad_norm": 0.18444591108122746,
|
|
"learning_rate": 2.5465714325776645e-05,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40533414483070374,
|
|
"step": 5930,
|
|
"valid_targets_mean": 28918.4,
|
|
"valid_targets_min": 21858
|
|
},
|
|
{
|
|
"epoch": 6.334044823906083,
|
|
"grad_norm": 0.18379872762780242,
|
|
"learning_rate": 2.5310680735522385e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40175458788871765,
|
|
"step": 5935,
|
|
"valid_targets_mean": 28804.4,
|
|
"valid_targets_min": 19394
|
|
},
|
|
{
|
|
"epoch": 6.339381003201708,
|
|
"grad_norm": 0.18930997642920325,
|
|
"learning_rate": 2.5156052098981143e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40313613414764404,
|
|
"step": 5940,
|
|
"valid_targets_mean": 28761.6,
|
|
"valid_targets_min": 19766
|
|
},
|
|
{
|
|
"epoch": 6.344717182497332,
|
|
"grad_norm": 0.1832631480960261,
|
|
"learning_rate": 2.5001829254524945e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40204566717147827,
|
|
"step": 5945,
|
|
"valid_targets_mean": 28999.8,
|
|
"valid_targets_min": 21177
|
|
},
|
|
{
|
|
"epoch": 6.350053361792956,
|
|
"grad_norm": 0.2273332984394366,
|
|
"learning_rate": 2.4848013038325756e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40472328662872314,
|
|
"step": 5950,
|
|
"valid_targets_mean": 28942.7,
|
|
"valid_targets_min": 21101
|
|
},
|
|
{
|
|
"epoch": 6.355389541088581,
|
|
"grad_norm": 0.19163706801171704,
|
|
"learning_rate": 2.4694604284350875e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39981400966644287,
|
|
"step": 5955,
|
|
"valid_targets_mean": 28912.2,
|
|
"valid_targets_min": 18845
|
|
},
|
|
{
|
|
"epoch": 6.360725720384205,
|
|
"grad_norm": 0.19069414768695025,
|
|
"learning_rate": 2.4541603824358384e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.402090847492218,
|
|
"step": 5960,
|
|
"valid_targets_mean": 28865.6,
|
|
"valid_targets_min": 23438
|
|
},
|
|
{
|
|
"epoch": 6.3660618996798295,
|
|
"grad_norm": 0.18179804752403267,
|
|
"learning_rate": 2.4389012487892627e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40023428201675415,
|
|
"step": 5965,
|
|
"valid_targets_mean": 28759.3,
|
|
"valid_targets_min": 21201
|
|
},
|
|
{
|
|
"epoch": 6.371398078975454,
|
|
"grad_norm": 0.2148975593144794,
|
|
"learning_rate": 2.4236831102279756e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40240776538848877,
|
|
"step": 5970,
|
|
"valid_targets_mean": 28966.0,
|
|
"valid_targets_min": 22745
|
|
},
|
|
{
|
|
"epoch": 6.376734258271078,
|
|
"grad_norm": 0.18490174473146967,
|
|
"learning_rate": 2.408506049262329e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4022800028324127,
|
|
"step": 5975,
|
|
"valid_targets_mean": 28688.3,
|
|
"valid_targets_min": 21559
|
|
},
|
|
{
|
|
"epoch": 6.382070437566703,
|
|
"grad_norm": 0.1844255041375129,
|
|
"learning_rate": 2.3933701481799496e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40320301055908203,
|
|
"step": 5980,
|
|
"valid_targets_mean": 28579.6,
|
|
"valid_targets_min": 20348
|
|
},
|
|
{
|
|
"epoch": 6.387406616862327,
|
|
"grad_norm": 0.21357911835637616,
|
|
"learning_rate": 2.3782754890453106e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3996123671531677,
|
|
"step": 5985,
|
|
"valid_targets_mean": 28688.8,
|
|
"valid_targets_min": 19354
|
|
},
|
|
{
|
|
"epoch": 6.392742796157951,
|
|
"grad_norm": 0.196175539187502,
|
|
"learning_rate": 2.363222153699276e-05,
|
|
"loss": 0.405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4055750072002411,
|
|
"step": 5990,
|
|
"valid_targets_mean": 28719.9,
|
|
"valid_targets_min": 20713
|
|
},
|
|
{
|
|
"epoch": 6.398078975453576,
|
|
"grad_norm": 0.18380230078382345,
|
|
"learning_rate": 2.34821022375865e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4092828333377838,
|
|
"step": 5995,
|
|
"valid_targets_mean": 28915.8,
|
|
"valid_targets_min": 23482
|
|
},
|
|
{
|
|
"epoch": 6.4034151547492,
|
|
"grad_norm": 0.16559855043636187,
|
|
"learning_rate": 2.333239780615756e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40028464794158936,
|
|
"step": 6000,
|
|
"valid_targets_mean": 28787.5,
|
|
"valid_targets_min": 21447
|
|
},
|
|
{
|
|
"epoch": 6.408751334044824,
|
|
"grad_norm": 0.19131444987514326,
|
|
"learning_rate": 2.3183109054379715e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40193384885787964,
|
|
"step": 6005,
|
|
"valid_targets_mean": 28890.1,
|
|
"valid_targets_min": 21725
|
|
},
|
|
{
|
|
"epoch": 6.414087513340448,
|
|
"grad_norm": 0.20081963856111104,
|
|
"learning_rate": 2.3034236791673058e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40240931510925293,
|
|
"step": 6010,
|
|
"valid_targets_mean": 28768.8,
|
|
"valid_targets_min": 21470
|
|
},
|
|
{
|
|
"epoch": 6.419423692636073,
|
|
"grad_norm": 0.19922149504539194,
|
|
"learning_rate": 2.2885781825199526e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4008885622024536,
|
|
"step": 6015,
|
|
"valid_targets_mean": 28945.8,
|
|
"valid_targets_min": 22275
|
|
},
|
|
{
|
|
"epoch": 6.4247598719316965,
|
|
"grad_norm": 0.19131542925233516,
|
|
"learning_rate": 2.2737744959858577e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3999541401863098,
|
|
"step": 6020,
|
|
"valid_targets_mean": 28794.4,
|
|
"valid_targets_min": 21997
|
|
},
|
|
{
|
|
"epoch": 6.430096051227321,
|
|
"grad_norm": 0.19508531274083823,
|
|
"learning_rate": 2.2590126998282725e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4026764929294586,
|
|
"step": 6025,
|
|
"valid_targets_mean": 28815.1,
|
|
"valid_targets_min": 20115
|
|
},
|
|
{
|
|
"epoch": 6.435432230522945,
|
|
"grad_norm": 0.1817251897940026,
|
|
"learning_rate": 2.2442928740833245e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4037598669528961,
|
|
"step": 6030,
|
|
"valid_targets_mean": 28978.1,
|
|
"valid_targets_min": 22243
|
|
},
|
|
{
|
|
"epoch": 6.44076840981857,
|
|
"grad_norm": 0.1877156586444744,
|
|
"learning_rate": 2.229615098559594e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4073331356048584,
|
|
"step": 6035,
|
|
"valid_targets_mean": 28778.0,
|
|
"valid_targets_min": 21732
|
|
},
|
|
{
|
|
"epoch": 6.446104589114194,
|
|
"grad_norm": 0.1883960030591332,
|
|
"learning_rate": 2.214979452837661e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39945822954177856,
|
|
"step": 6040,
|
|
"valid_targets_mean": 28708.4,
|
|
"valid_targets_min": 22806
|
|
},
|
|
{
|
|
"epoch": 6.451440768409818,
|
|
"grad_norm": 0.18792510830856934,
|
|
"learning_rate": 2.200386016269691e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40260523557662964,
|
|
"step": 6045,
|
|
"valid_targets_mean": 28852.8,
|
|
"valid_targets_min": 23161
|
|
},
|
|
{
|
|
"epoch": 6.456776947705443,
|
|
"grad_norm": 0.18046002723349452,
|
|
"learning_rate": 2.1858348679789987e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40115612745285034,
|
|
"step": 6050,
|
|
"valid_targets_mean": 28744.5,
|
|
"valid_targets_min": 17458
|
|
},
|
|
{
|
|
"epoch": 6.462113127001067,
|
|
"grad_norm": 0.19326827523187853,
|
|
"learning_rate": 2.171326086859612e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4018067717552185,
|
|
"step": 6055,
|
|
"valid_targets_mean": 28825.2,
|
|
"valid_targets_min": 20330
|
|
},
|
|
{
|
|
"epoch": 6.467449306296691,
|
|
"grad_norm": 0.18187928385392707,
|
|
"learning_rate": 2.1568597515758572e-05,
|
|
"loss": 0.4033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3992971181869507,
|
|
"step": 6060,
|
|
"valid_targets_mean": 28750.9,
|
|
"valid_targets_min": 22831
|
|
},
|
|
{
|
|
"epoch": 6.472785485592316,
|
|
"grad_norm": 0.18571117091706152,
|
|
"learning_rate": 2.142435940561921e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40224558115005493,
|
|
"step": 6065,
|
|
"valid_targets_mean": 28840.0,
|
|
"valid_targets_min": 23005
|
|
},
|
|
{
|
|
"epoch": 6.47812166488794,
|
|
"grad_norm": 0.19856471998068015,
|
|
"learning_rate": 2.128054732021435e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4016802906990051,
|
|
"step": 6070,
|
|
"valid_targets_mean": 28950.6,
|
|
"valid_targets_min": 21925
|
|
},
|
|
{
|
|
"epoch": 6.483457844183564,
|
|
"grad_norm": 0.18011864362954913,
|
|
"learning_rate": 2.1137162039270386e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40601515769958496,
|
|
"step": 6075,
|
|
"valid_targets_mean": 28755.4,
|
|
"valid_targets_min": 20836
|
|
},
|
|
{
|
|
"epoch": 6.488794023479189,
|
|
"grad_norm": 0.1754307871285436,
|
|
"learning_rate": 2.0994204340199806e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40100088715553284,
|
|
"step": 6080,
|
|
"valid_targets_mean": 28894.5,
|
|
"valid_targets_min": 21107
|
|
},
|
|
{
|
|
"epoch": 6.494130202774813,
|
|
"grad_norm": 0.18780028341035576,
|
|
"learning_rate": 2.085167499809666e-05,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40386563539505005,
|
|
"step": 6085,
|
|
"valid_targets_mean": 28856.9,
|
|
"valid_targets_min": 20525
|
|
},
|
|
{
|
|
"epoch": 6.4994663820704375,
|
|
"grad_norm": 0.17939591035162056,
|
|
"learning_rate": 2.0709574785732544e-05,
|
|
"loss": 0.4043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40500178933143616,
|
|
"step": 6090,
|
|
"valid_targets_mean": 28813.9,
|
|
"valid_targets_min": 22234
|
|
},
|
|
{
|
|
"epoch": 6.504802561366062,
|
|
"grad_norm": 0.19077799045157887,
|
|
"learning_rate": 2.0567904473552446e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4002484083175659,
|
|
"step": 6095,
|
|
"valid_targets_mean": 28850.9,
|
|
"valid_targets_min": 21860
|
|
},
|
|
{
|
|
"epoch": 6.510138740661686,
|
|
"grad_norm": 0.1897438513167475,
|
|
"learning_rate": 2.0426664829670415e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.399167537689209,
|
|
"step": 6100,
|
|
"valid_targets_mean": 29116.8,
|
|
"valid_targets_min": 23393
|
|
},
|
|
{
|
|
"epoch": 6.5154749199573105,
|
|
"grad_norm": 0.19359987172136103,
|
|
"learning_rate": 2.0285856619865573e-05,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4001930356025696,
|
|
"step": 6105,
|
|
"valid_targets_mean": 28885.3,
|
|
"valid_targets_min": 22689
|
|
},
|
|
{
|
|
"epoch": 6.520811099252935,
|
|
"grad_norm": 0.1837096087171172,
|
|
"learning_rate": 2.014548060757785e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4038437306880951,
|
|
"step": 6110,
|
|
"valid_targets_mean": 28748.8,
|
|
"valid_targets_min": 17902
|
|
},
|
|
{
|
|
"epoch": 6.526147278548559,
|
|
"grad_norm": 0.17583615950748377,
|
|
"learning_rate": 2.0005537553903787e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40059417486190796,
|
|
"step": 6115,
|
|
"valid_targets_mean": 28806.0,
|
|
"valid_targets_min": 17871
|
|
},
|
|
{
|
|
"epoch": 6.531483457844184,
|
|
"grad_norm": 0.1817073021255667,
|
|
"learning_rate": 1.9866028217592612e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3994242548942566,
|
|
"step": 6120,
|
|
"valid_targets_mean": 28856.7,
|
|
"valid_targets_min": 19786
|
|
},
|
|
{
|
|
"epoch": 6.536819637139808,
|
|
"grad_norm": 0.18507873365948924,
|
|
"learning_rate": 1.972695335504192e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3999413251876831,
|
|
"step": 6125,
|
|
"valid_targets_mean": 28968.7,
|
|
"valid_targets_min": 23619
|
|
},
|
|
{
|
|
"epoch": 6.542155816435432,
|
|
"grad_norm": 0.17834966809846056,
|
|
"learning_rate": 1.9588313720293717e-05,
|
|
"loss": 0.4029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40515387058258057,
|
|
"step": 6130,
|
|
"valid_targets_mean": 28770.1,
|
|
"valid_targets_min": 19698
|
|
},
|
|
{
|
|
"epoch": 6.547491995731057,
|
|
"grad_norm": 0.17179000860676627,
|
|
"learning_rate": 1.9450110065030202e-05,
|
|
"loss": 0.4008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3992699980735779,
|
|
"step": 6135,
|
|
"valid_targets_mean": 28938.3,
|
|
"valid_targets_min": 16427
|
|
},
|
|
{
|
|
"epoch": 6.552828175026681,
|
|
"grad_norm": 0.18437786009110965,
|
|
"learning_rate": 1.9312343138569822e-05,
|
|
"loss": 0.4024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.400585412979126,
|
|
"step": 6140,
|
|
"valid_targets_mean": 28727.0,
|
|
"valid_targets_min": 20169
|
|
},
|
|
{
|
|
"epoch": 6.558164354322305,
|
|
"grad_norm": 0.19252564397052424,
|
|
"learning_rate": 1.9175013687863165e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40095585584640503,
|
|
"step": 6145,
|
|
"valid_targets_mean": 28912.5,
|
|
"valid_targets_min": 21936
|
|
},
|
|
{
|
|
"epoch": 6.56350053361793,
|
|
"grad_norm": 0.1816462272421975,
|
|
"learning_rate": 1.9038122457488804e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39897194504737854,
|
|
"step": 6150,
|
|
"valid_targets_mean": 28958.5,
|
|
"valid_targets_min": 22794
|
|
},
|
|
{
|
|
"epoch": 6.568836712913554,
|
|
"grad_norm": 0.18775048667792404,
|
|
"learning_rate": 1.8901670189649457e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40145641565322876,
|
|
"step": 6155,
|
|
"valid_targets_mean": 29103.2,
|
|
"valid_targets_min": 22797
|
|
},
|
|
{
|
|
"epoch": 6.574172892209178,
|
|
"grad_norm": 0.19158581227718158,
|
|
"learning_rate": 1.8765657624167765e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40386325120925903,
|
|
"step": 6160,
|
|
"valid_targets_mean": 28646.2,
|
|
"valid_targets_min": 22585
|
|
},
|
|
{
|
|
"epoch": 6.579509071504803,
|
|
"grad_norm": 0.18156089305735862,
|
|
"learning_rate": 1.863008549848243e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4037448763847351,
|
|
"step": 6165,
|
|
"valid_targets_mean": 28938.0,
|
|
"valid_targets_min": 21285
|
|
},
|
|
{
|
|
"epoch": 6.584845250800427,
|
|
"grad_norm": 0.19574581682266376,
|
|
"learning_rate": 1.849495454764416e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39787396788597107,
|
|
"step": 6170,
|
|
"valid_targets_mean": 28945.8,
|
|
"valid_targets_min": 19712
|
|
},
|
|
{
|
|
"epoch": 6.5901814300960515,
|
|
"grad_norm": 0.18243929360702177,
|
|
"learning_rate": 1.8360265504311635e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40304043889045715,
|
|
"step": 6175,
|
|
"valid_targets_mean": 28608.3,
|
|
"valid_targets_min": 20646
|
|
},
|
|
{
|
|
"epoch": 6.595517609391676,
|
|
"grad_norm": 0.18102304076902928,
|
|
"learning_rate": 1.822601909874765e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40229958295822144,
|
|
"step": 6180,
|
|
"valid_targets_mean": 28912.8,
|
|
"valid_targets_min": 22071
|
|
},
|
|
{
|
|
"epoch": 6.6008537886873,
|
|
"grad_norm": 0.18009646165533064,
|
|
"learning_rate": 1.8092216058814993e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4023338556289673,
|
|
"step": 6185,
|
|
"valid_targets_mean": 28818.4,
|
|
"valid_targets_min": 19823
|
|
},
|
|
{
|
|
"epoch": 6.606189967982925,
|
|
"grad_norm": 0.17824539351505556,
|
|
"learning_rate": 1.7958857109972714e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39848190546035767,
|
|
"step": 6190,
|
|
"valid_targets_mean": 28675.4,
|
|
"valid_targets_min": 21687
|
|
},
|
|
{
|
|
"epoch": 6.611526147278549,
|
|
"grad_norm": 0.17141496286854313,
|
|
"learning_rate": 1.7825942975271946e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39964133501052856,
|
|
"step": 6195,
|
|
"valid_targets_mean": 28842.7,
|
|
"valid_targets_min": 22278
|
|
},
|
|
{
|
|
"epoch": 6.616862326574173,
|
|
"grad_norm": 0.18120109301172674,
|
|
"learning_rate": 1.7693474375352193e-05,
|
|
"loss": 0.4014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40147751569747925,
|
|
"step": 6200,
|
|
"valid_targets_mean": 28814.5,
|
|
"valid_targets_min": 22127
|
|
},
|
|
{
|
|
"epoch": 6.622198505869797,
|
|
"grad_norm": 0.18343171990938137,
|
|
"learning_rate": 1.7561452028437362e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40402135252952576,
|
|
"step": 6205,
|
|
"valid_targets_mean": 28789.7,
|
|
"valid_targets_min": 19184
|
|
},
|
|
{
|
|
"epoch": 6.627534685165422,
|
|
"grad_norm": 0.1617202995515404,
|
|
"learning_rate": 1.7429876650331732e-05,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.399649441242218,
|
|
"step": 6210,
|
|
"valid_targets_mean": 28866.6,
|
|
"valid_targets_min": 23176
|
|
},
|
|
{
|
|
"epoch": 6.6328708644610455,
|
|
"grad_norm": 0.18035454442476903,
|
|
"learning_rate": 1.7298748954416334e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.406667023897171,
|
|
"step": 6215,
|
|
"valid_targets_mean": 28882.5,
|
|
"valid_targets_min": 22635
|
|
},
|
|
{
|
|
"epoch": 6.638207043756671,
|
|
"grad_norm": 0.16518628892225848,
|
|
"learning_rate": 1.716806965164477e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4044821858406067,
|
|
"step": 6220,
|
|
"valid_targets_mean": 28862.5,
|
|
"valid_targets_min": 20417
|
|
},
|
|
{
|
|
"epoch": 6.643543223052294,
|
|
"grad_norm": 0.17373762749649188,
|
|
"learning_rate": 1.7037839450539706e-05,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39518213272094727,
|
|
"step": 6225,
|
|
"valid_targets_mean": 28887.1,
|
|
"valid_targets_min": 16333
|
|
},
|
|
{
|
|
"epoch": 6.6488794023479185,
|
|
"grad_norm": 0.1766409075861455,
|
|
"learning_rate": 1.6908059057188686e-05,
|
|
"loss": 0.4032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40541961789131165,
|
|
"step": 6230,
|
|
"valid_targets_mean": 28740.7,
|
|
"valid_targets_min": 19735
|
|
},
|
|
{
|
|
"epoch": 6.654215581643543,
|
|
"grad_norm": 0.1968366034578871,
|
|
"learning_rate": 1.677872917524057e-05,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4014188051223755,
|
|
"step": 6235,
|
|
"valid_targets_mean": 28856.3,
|
|
"valid_targets_min": 20610
|
|
},
|
|
{
|
|
"epoch": 6.659551760939167,
|
|
"grad_norm": 0.18237048405103176,
|
|
"learning_rate": 1.664985050590162e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40323466062545776,
|
|
"step": 6240,
|
|
"valid_targets_mean": 28779.6,
|
|
"valid_targets_min": 20577
|
|
},
|
|
{
|
|
"epoch": 6.664887940234792,
|
|
"grad_norm": 0.2157169595866924,
|
|
"learning_rate": 1.6521423747931597e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39827534556388855,
|
|
"step": 6245,
|
|
"valid_targets_mean": 28740.5,
|
|
"valid_targets_min": 21039
|
|
},
|
|
{
|
|
"epoch": 6.670224119530416,
|
|
"grad_norm": 0.16378620466568197,
|
|
"learning_rate": 1.63934495976402e-05,
|
|
"loss": 0.4024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40637755393981934,
|
|
"step": 6250,
|
|
"valid_targets_mean": 28883.8,
|
|
"valid_targets_min": 20594
|
|
},
|
|
{
|
|
"epoch": 6.67556029882604,
|
|
"grad_norm": 0.19308134301794055,
|
|
"learning_rate": 1.626592874888303e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4051148295402527,
|
|
"step": 6255,
|
|
"valid_targets_mean": 28807.3,
|
|
"valid_targets_min": 22866
|
|
},
|
|
{
|
|
"epoch": 6.680896478121665,
|
|
"grad_norm": 0.15946375328063236,
|
|
"learning_rate": 1.6138861893058067e-05,
|
|
"loss": 0.4029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4002209007740021,
|
|
"step": 6260,
|
|
"valid_targets_mean": 28840.7,
|
|
"valid_targets_min": 17736
|
|
},
|
|
{
|
|
"epoch": 6.686232657417289,
|
|
"grad_norm": 0.1631580545072671,
|
|
"learning_rate": 1.601224971910177e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40180450677871704,
|
|
"step": 6265,
|
|
"valid_targets_mean": 28697.5,
|
|
"valid_targets_min": 17344
|
|
},
|
|
{
|
|
"epoch": 6.691568836712913,
|
|
"grad_norm": 0.18043079602468606,
|
|
"learning_rate": 1.588609291348535e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.401018887758255,
|
|
"step": 6270,
|
|
"valid_targets_mean": 28887.6,
|
|
"valid_targets_min": 20293
|
|
},
|
|
{
|
|
"epoch": 6.696905016008538,
|
|
"grad_norm": 0.19874888578672376,
|
|
"learning_rate": 1.5760392160211156e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3999865651130676,
|
|
"step": 6275,
|
|
"valid_targets_mean": 28792.9,
|
|
"valid_targets_min": 21576
|
|
},
|
|
{
|
|
"epoch": 6.702241195304162,
|
|
"grad_norm": 0.17033556307489203,
|
|
"learning_rate": 1.563514814080881e-05,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4025978744029999,
|
|
"step": 6280,
|
|
"valid_targets_mean": 28552.2,
|
|
"valid_targets_min": 21530
|
|
},
|
|
{
|
|
"epoch": 6.707577374599786,
|
|
"grad_norm": 0.18220728792481394,
|
|
"learning_rate": 1.5510361534331686e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4051911234855652,
|
|
"step": 6285,
|
|
"valid_targets_mean": 28988.1,
|
|
"valid_targets_min": 19604
|
|
},
|
|
{
|
|
"epoch": 6.712913553895411,
|
|
"grad_norm": 0.1979186351056776,
|
|
"learning_rate": 1.538603301735305e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3975885510444641,
|
|
"step": 6290,
|
|
"valid_targets_mean": 29006.6,
|
|
"valid_targets_min": 20821
|
|
},
|
|
{
|
|
"epoch": 6.718249733191035,
|
|
"grad_norm": 0.18776718169818407,
|
|
"learning_rate": 1.5262163263962527e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4064946174621582,
|
|
"step": 6295,
|
|
"valid_targets_mean": 28904.0,
|
|
"valid_targets_min": 22811
|
|
},
|
|
{
|
|
"epoch": 6.7235859124866595,
|
|
"grad_norm": 0.18165907356672129,
|
|
"learning_rate": 1.5138752945762425e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40303996205329895,
|
|
"step": 6300,
|
|
"valid_targets_mean": 28870.6,
|
|
"valid_targets_min": 20970
|
|
},
|
|
{
|
|
"epoch": 6.728922091782284,
|
|
"grad_norm": 0.17790068533186368,
|
|
"learning_rate": 1.5015802731863981e-05,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39912688732147217,
|
|
"step": 6305,
|
|
"valid_targets_mean": 28779.1,
|
|
"valid_targets_min": 20784
|
|
},
|
|
{
|
|
"epoch": 6.734258271077908,
|
|
"grad_norm": 0.16495901813636496,
|
|
"learning_rate": 1.4893313288883915e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4006229043006897,
|
|
"step": 6310,
|
|
"valid_targets_mean": 28860.0,
|
|
"valid_targets_min": 18795
|
|
},
|
|
{
|
|
"epoch": 6.7395944503735326,
|
|
"grad_norm": 0.16451303664483405,
|
|
"learning_rate": 1.4771285280940616e-05,
|
|
"loss": 0.4005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40138137340545654,
|
|
"step": 6315,
|
|
"valid_targets_mean": 28857.6,
|
|
"valid_targets_min": 19825
|
|
},
|
|
{
|
|
"epoch": 6.744930629669157,
|
|
"grad_norm": 0.17086287887312568,
|
|
"learning_rate": 1.4649719369650772e-05,
|
|
"loss": 0.3999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40100106596946716,
|
|
"step": 6320,
|
|
"valid_targets_mean": 28765.1,
|
|
"valid_targets_min": 18374
|
|
},
|
|
{
|
|
"epoch": 6.750266808964781,
|
|
"grad_norm": 0.16714376991812513,
|
|
"learning_rate": 1.4528616214125535e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4033486247062683,
|
|
"step": 6325,
|
|
"valid_targets_mean": 28851.5,
|
|
"valid_targets_min": 21665
|
|
},
|
|
{
|
|
"epoch": 6.755602988260406,
|
|
"grad_norm": 0.16704078585302812,
|
|
"learning_rate": 1.4407976470967133e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40259772539138794,
|
|
"step": 6330,
|
|
"valid_targets_mean": 28977.0,
|
|
"valid_targets_min": 21840
|
|
},
|
|
{
|
|
"epoch": 6.76093916755603,
|
|
"grad_norm": 0.16866547396425438,
|
|
"learning_rate": 1.428780079426526e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4046093225479126,
|
|
"step": 6335,
|
|
"valid_targets_mean": 28653.1,
|
|
"valid_targets_min": 18063
|
|
},
|
|
{
|
|
"epoch": 6.766275346851654,
|
|
"grad_norm": 0.16086409262800527,
|
|
"learning_rate": 1.4168089835593446e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40628403425216675,
|
|
"step": 6340,
|
|
"valid_targets_mean": 28960.2,
|
|
"valid_targets_min": 20183
|
|
},
|
|
{
|
|
"epoch": 6.771611526147279,
|
|
"grad_norm": 0.1869500620775073,
|
|
"learning_rate": 1.4048844244005666e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4005264341831207,
|
|
"step": 6345,
|
|
"valid_targets_mean": 28785.8,
|
|
"valid_targets_min": 23578
|
|
},
|
|
{
|
|
"epoch": 6.776947705442903,
|
|
"grad_norm": 0.1697742455933057,
|
|
"learning_rate": 1.3930064666032693e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4072091579437256,
|
|
"step": 6350,
|
|
"valid_targets_mean": 28691.9,
|
|
"valid_targets_min": 22316
|
|
},
|
|
{
|
|
"epoch": 6.782283884738527,
|
|
"grad_norm": 0.16988728925311614,
|
|
"learning_rate": 1.381175174567868e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4012793302536011,
|
|
"step": 6355,
|
|
"valid_targets_mean": 28903.2,
|
|
"valid_targets_min": 22100
|
|
},
|
|
{
|
|
"epoch": 6.787620064034152,
|
|
"grad_norm": 0.1642169911188869,
|
|
"learning_rate": 1.3693906124417655e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4029124975204468,
|
|
"step": 6360,
|
|
"valid_targets_mean": 28804.8,
|
|
"valid_targets_min": 22231
|
|
},
|
|
{
|
|
"epoch": 6.792956243329776,
|
|
"grad_norm": 0.18018467005146654,
|
|
"learning_rate": 1.357652844118994e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40579214692115784,
|
|
"step": 6365,
|
|
"valid_targets_mean": 28713.4,
|
|
"valid_targets_min": 19188
|
|
},
|
|
{
|
|
"epoch": 6.7982924226254005,
|
|
"grad_norm": 0.18591848673587488,
|
|
"learning_rate": 1.3459619332398887e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4033072888851166,
|
|
"step": 6370,
|
|
"valid_targets_mean": 28754.1,
|
|
"valid_targets_min": 20035
|
|
},
|
|
{
|
|
"epoch": 6.803628601921025,
|
|
"grad_norm": 0.14922686750987793,
|
|
"learning_rate": 1.3343179431907193e-05,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3995817303657532,
|
|
"step": 6375,
|
|
"valid_targets_mean": 28785.5,
|
|
"valid_targets_min": 21644
|
|
},
|
|
{
|
|
"epoch": 6.808964781216649,
|
|
"grad_norm": 0.16633454388559174,
|
|
"learning_rate": 1.32272093710337e-05,
|
|
"loss": 0.4,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3998081088066101,
|
|
"step": 6380,
|
|
"valid_targets_mean": 28761.7,
|
|
"valid_targets_min": 21351
|
|
},
|
|
{
|
|
"epoch": 6.8143009605122735,
|
|
"grad_norm": 0.1744511950355698,
|
|
"learning_rate": 1.311170977854973e-05,
|
|
"loss": 0.4039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40507781505584717,
|
|
"step": 6385,
|
|
"valid_targets_mean": 28851.4,
|
|
"valid_targets_min": 21563
|
|
},
|
|
{
|
|
"epoch": 6.819637139807898,
|
|
"grad_norm": 0.16895744034595586,
|
|
"learning_rate": 1.2996681280675894e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40257859230041504,
|
|
"step": 6390,
|
|
"valid_targets_mean": 28739.7,
|
|
"valid_targets_min": 19245
|
|
},
|
|
{
|
|
"epoch": 6.824973319103522,
|
|
"grad_norm": 0.177459104380991,
|
|
"learning_rate": 1.2882124501078597e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39968159794807434,
|
|
"step": 6395,
|
|
"valid_targets_mean": 28733.4,
|
|
"valid_targets_min": 20661
|
|
},
|
|
{
|
|
"epoch": 6.830309498399147,
|
|
"grad_norm": 0.18238957188441435,
|
|
"learning_rate": 1.2768040060866593e-05,
|
|
"loss": 0.4002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4020434021949768,
|
|
"step": 6400,
|
|
"valid_targets_mean": 28898.1,
|
|
"valid_targets_min": 22863
|
|
},
|
|
{
|
|
"epoch": 6.835645677694771,
|
|
"grad_norm": 0.15973146615703437,
|
|
"learning_rate": 1.2654428578587763e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40105491876602173,
|
|
"step": 6405,
|
|
"valid_targets_mean": 28873.5,
|
|
"valid_targets_min": 22667
|
|
},
|
|
{
|
|
"epoch": 6.840981856990394,
|
|
"grad_norm": 0.1798151031695525,
|
|
"learning_rate": 1.2541290670225647e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4018270969390869,
|
|
"step": 6410,
|
|
"valid_targets_mean": 28675.7,
|
|
"valid_targets_min": 22289
|
|
},
|
|
{
|
|
"epoch": 6.84631803628602,
|
|
"grad_norm": 0.16448721303729166,
|
|
"learning_rate": 1.2428626949196076e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4013287425041199,
|
|
"step": 6415,
|
|
"valid_targets_mean": 28779.9,
|
|
"valid_targets_min": 18744
|
|
},
|
|
{
|
|
"epoch": 6.851654215581643,
|
|
"grad_norm": 0.17415414751858463,
|
|
"learning_rate": 1.231643802634409e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40349093079566956,
|
|
"step": 6420,
|
|
"valid_targets_mean": 28927.2,
|
|
"valid_targets_min": 20736
|
|
},
|
|
{
|
|
"epoch": 6.856990394877268,
|
|
"grad_norm": 0.16319867986700945,
|
|
"learning_rate": 1.220472450994027e-05,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3997553586959839,
|
|
"step": 6425,
|
|
"valid_targets_mean": 28938.6,
|
|
"valid_targets_min": 22659
|
|
},
|
|
{
|
|
"epoch": 6.862326574172892,
|
|
"grad_norm": 0.16461669022500233,
|
|
"learning_rate": 1.2093487005677728e-05,
|
|
"loss": 0.3989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3953731656074524,
|
|
"step": 6430,
|
|
"valid_targets_mean": 28847.4,
|
|
"valid_targets_min": 22060
|
|
},
|
|
{
|
|
"epoch": 6.867662753468516,
|
|
"grad_norm": 0.1724566284023691,
|
|
"learning_rate": 1.1982726116668642e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4027242660522461,
|
|
"step": 6435,
|
|
"valid_targets_mean": 28751.4,
|
|
"valid_targets_min": 21767
|
|
},
|
|
{
|
|
"epoch": 6.8729989327641405,
|
|
"grad_norm": 0.16308302612711748,
|
|
"learning_rate": 1.1872442443441113e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39664605259895325,
|
|
"step": 6440,
|
|
"valid_targets_mean": 28841.8,
|
|
"valid_targets_min": 22914
|
|
},
|
|
{
|
|
"epoch": 6.878335112059765,
|
|
"grad_norm": 0.1482871997880307,
|
|
"learning_rate": 1.1762636583935793e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40228116512298584,
|
|
"step": 6445,
|
|
"valid_targets_mean": 28875.2,
|
|
"valid_targets_min": 20679
|
|
},
|
|
{
|
|
"epoch": 6.883671291355389,
|
|
"grad_norm": 0.1685029515968394,
|
|
"learning_rate": 1.1653309133502777e-05,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40284842252731323,
|
|
"step": 6450,
|
|
"valid_targets_mean": 28918.3,
|
|
"valid_targets_min": 19134
|
|
},
|
|
{
|
|
"epoch": 6.889007470651014,
|
|
"grad_norm": 0.15905665248843048,
|
|
"learning_rate": 1.1544460684898261e-05,
|
|
"loss": 0.4028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39986366033554077,
|
|
"step": 6455,
|
|
"valid_targets_mean": 28739.9,
|
|
"valid_targets_min": 20314
|
|
},
|
|
{
|
|
"epoch": 6.894343649946638,
|
|
"grad_norm": 0.17853792674427735,
|
|
"learning_rate": 1.1436091828281348e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40273600816726685,
|
|
"step": 6460,
|
|
"valid_targets_mean": 28903.6,
|
|
"valid_targets_min": 20874
|
|
},
|
|
{
|
|
"epoch": 6.899679829242262,
|
|
"grad_norm": 0.1557539114073379,
|
|
"learning_rate": 1.1328203151210915e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40460121631622314,
|
|
"step": 6465,
|
|
"valid_targets_mean": 28848.5,
|
|
"valid_targets_min": 21189
|
|
},
|
|
{
|
|
"epoch": 6.905016008537887,
|
|
"grad_norm": 0.16440118079599428,
|
|
"learning_rate": 1.1220795238642357e-05,
|
|
"loss": 0.3998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3983585834503174,
|
|
"step": 6470,
|
|
"valid_targets_mean": 28828.2,
|
|
"valid_targets_min": 23214
|
|
},
|
|
{
|
|
"epoch": 6.910352187833511,
|
|
"grad_norm": 0.15390341169540844,
|
|
"learning_rate": 1.1113868672924422e-05,
|
|
"loss": 0.4003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3979727327823639,
|
|
"step": 6475,
|
|
"valid_targets_mean": 28908.6,
|
|
"valid_targets_min": 22500
|
|
},
|
|
{
|
|
"epoch": 6.915688367129135,
|
|
"grad_norm": 0.156756752893968,
|
|
"learning_rate": 1.1007424033796087e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40027040243148804,
|
|
"step": 6480,
|
|
"valid_targets_mean": 28882.9,
|
|
"valid_targets_min": 20069
|
|
},
|
|
{
|
|
"epoch": 6.92102454642476,
|
|
"grad_norm": 0.14466757537942485,
|
|
"learning_rate": 1.0901461898383424e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39898619055747986,
|
|
"step": 6485,
|
|
"valid_targets_mean": 28843.0,
|
|
"valid_targets_min": 18406
|
|
},
|
|
{
|
|
"epoch": 6.926360725720384,
|
|
"grad_norm": 0.16297822304878332,
|
|
"learning_rate": 1.079598284119644e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40116146206855774,
|
|
"step": 6490,
|
|
"valid_targets_mean": 28951.9,
|
|
"valid_targets_min": 18291
|
|
},
|
|
{
|
|
"epoch": 6.931696905016008,
|
|
"grad_norm": 0.17699960140786858,
|
|
"learning_rate": 1.06909874341259e-05,
|
|
"loss": 0.4016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4010434150695801,
|
|
"step": 6495,
|
|
"valid_targets_mean": 28819.8,
|
|
"valid_targets_min": 21154
|
|
},
|
|
{
|
|
"epoch": 6.937033084311633,
|
|
"grad_norm": 0.15436844588832435,
|
|
"learning_rate": 1.0586476246440403e-05,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39718085527420044,
|
|
"step": 6500,
|
|
"valid_targets_mean": 28895.1,
|
|
"valid_targets_min": 22361
|
|
},
|
|
{
|
|
"epoch": 6.942369263607257,
|
|
"grad_norm": 0.15191274323938955,
|
|
"learning_rate": 1.0482449844783093e-05,
|
|
"loss": 0.4003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40134063363075256,
|
|
"step": 6505,
|
|
"valid_targets_mean": 28845.7,
|
|
"valid_targets_min": 21166
|
|
},
|
|
{
|
|
"epoch": 6.9477054429028815,
|
|
"grad_norm": 0.16467666322186136,
|
|
"learning_rate": 1.0378908793168718e-05,
|
|
"loss": 0.4003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40012848377227783,
|
|
"step": 6510,
|
|
"valid_targets_mean": 28919.9,
|
|
"valid_targets_min": 20421
|
|
},
|
|
{
|
|
"epoch": 6.953041622198506,
|
|
"grad_norm": 0.16302817190639357,
|
|
"learning_rate": 1.0275853652980571e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3995431959629059,
|
|
"step": 6515,
|
|
"valid_targets_mean": 28810.1,
|
|
"valid_targets_min": 20639
|
|
},
|
|
{
|
|
"epoch": 6.95837780149413,
|
|
"grad_norm": 0.15702759706035485,
|
|
"learning_rate": 1.0173284982967335e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3988278806209564,
|
|
"step": 6520,
|
|
"valid_targets_mean": 28847.2,
|
|
"valid_targets_min": 20450
|
|
},
|
|
{
|
|
"epoch": 6.963713980789755,
|
|
"grad_norm": 0.14701600676799004,
|
|
"learning_rate": 1.0071203339240198e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4036068916320801,
|
|
"step": 6525,
|
|
"valid_targets_mean": 28974.2,
|
|
"valid_targets_min": 22440
|
|
},
|
|
{
|
|
"epoch": 6.969050160085379,
|
|
"grad_norm": 0.1603744815668825,
|
|
"learning_rate": 9.96960927526972e-06,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40067869424819946,
|
|
"step": 6530,
|
|
"valid_targets_mean": 28709.8,
|
|
"valid_targets_min": 16672
|
|
},
|
|
{
|
|
"epoch": 6.974386339381003,
|
|
"grad_norm": 0.15407947241724276,
|
|
"learning_rate": 9.868503341882872e-06,
|
|
"loss": 0.4005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3975338935852051,
|
|
"step": 6535,
|
|
"valid_targets_mean": 28773.6,
|
|
"valid_targets_min": 19931
|
|
},
|
|
{
|
|
"epoch": 6.979722518676628,
|
|
"grad_norm": 0.15866317471533062,
|
|
"learning_rate": 9.767886087260125e-06,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40186864137649536,
|
|
"step": 6540,
|
|
"valid_targets_mean": 28824.8,
|
|
"valid_targets_min": 19711
|
|
},
|
|
{
|
|
"epoch": 6.985058697972252,
|
|
"grad_norm": 0.1603353701551661,
|
|
"learning_rate": 9.667758056932341e-06,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.398986279964447,
|
|
"step": 6545,
|
|
"valid_targets_mean": 28972.2,
|
|
"valid_targets_min": 22728
|
|
},
|
|
{
|
|
"epoch": 6.990394877267876,
|
|
"grad_norm": 0.15628933829566188,
|
|
"learning_rate": 9.568119793777952e-06,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.402820885181427,
|
|
"step": 6550,
|
|
"valid_targets_mean": 28688.5,
|
|
"valid_targets_min": 19256
|
|
},
|
|
{
|
|
"epoch": 6.995731056563501,
|
|
"grad_norm": 0.15171731237911634,
|
|
"learning_rate": 9.468971838019869e-06,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3992854356765747,
|
|
"step": 6555,
|
|
"valid_targets_mean": 28961.2,
|
|
"valid_targets_min": 21536
|
|
},
|
|
{
|
|
"epoch": 7.001067235859125,
|
|
"grad_norm": 0.15330599997099978,
|
|
"learning_rate": 9.370314727222718e-06,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40003207325935364,
|
|
"step": 6560,
|
|
"valid_targets_mean": 28604.1,
|
|
"valid_targets_min": 20828
|
|
},
|
|
{
|
|
"epoch": 7.006403415154749,
|
|
"grad_norm": 0.16415267947660667,
|
|
"learning_rate": 9.272148996289765e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4015958905220032,
|
|
"step": 6565,
|
|
"valid_targets_mean": 28911.6,
|
|
"valid_targets_min": 22307
|
|
},
|
|
{
|
|
"epoch": 7.011739594450374,
|
|
"grad_norm": 0.15677689118830038,
|
|
"learning_rate": 9.174475177460096e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3973429501056671,
|
|
"step": 6570,
|
|
"valid_targets_mean": 28885.8,
|
|
"valid_targets_min": 21403
|
|
},
|
|
{
|
|
"epoch": 7.017075773745998,
|
|
"grad_norm": 0.1555057933551184,
|
|
"learning_rate": 9.077293800305842e-06,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39989423751831055,
|
|
"step": 6575,
|
|
"valid_targets_mean": 28826.1,
|
|
"valid_targets_min": 21106
|
|
},
|
|
{
|
|
"epoch": 7.0224119530416225,
|
|
"grad_norm": 0.15198025809307297,
|
|
"learning_rate": 8.980605391729035e-06,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39894890785217285,
|
|
"step": 6580,
|
|
"valid_targets_mean": 28534.4,
|
|
"valid_targets_min": 19891
|
|
},
|
|
{
|
|
"epoch": 7.027748132337247,
|
|
"grad_norm": 0.14452981870722426,
|
|
"learning_rate": 8.884410475959026e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3982957601547241,
|
|
"step": 6585,
|
|
"valid_targets_mean": 28677.5,
|
|
"valid_targets_min": 14588
|
|
},
|
|
{
|
|
"epoch": 7.033084311632871,
|
|
"grad_norm": 0.15139167888669983,
|
|
"learning_rate": 8.788709574549469e-06,
|
|
"loss": 0.3964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39983707666397095,
|
|
"step": 6590,
|
|
"valid_targets_mean": 28721.9,
|
|
"valid_targets_min": 20030
|
|
},
|
|
{
|
|
"epoch": 7.0384204909284955,
|
|
"grad_norm": 0.17697283738640873,
|
|
"learning_rate": 8.693503206375531e-06,
|
|
"loss": 0.3989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39999592304229736,
|
|
"step": 6595,
|
|
"valid_targets_mean": 28832.6,
|
|
"valid_targets_min": 20481
|
|
},
|
|
{
|
|
"epoch": 7.04375667022412,
|
|
"grad_norm": 0.1463523461872221,
|
|
"learning_rate": 8.598791887631164e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39724496006965637,
|
|
"step": 6600,
|
|
"valid_targets_mean": 28774.0,
|
|
"valid_targets_min": 22752
|
|
},
|
|
{
|
|
"epoch": 7.049092849519744,
|
|
"grad_norm": 0.14902110723811787,
|
|
"learning_rate": 8.504576131826181e-06,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978765904903412,
|
|
"step": 6605,
|
|
"valid_targets_mean": 29010.4,
|
|
"valid_targets_min": 22184
|
|
},
|
|
{
|
|
"epoch": 7.054429028815369,
|
|
"grad_norm": 0.1473453761555798,
|
|
"learning_rate": 8.4108564497836e-06,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3956196904182434,
|
|
"step": 6610,
|
|
"valid_targets_mean": 28715.8,
|
|
"valid_targets_min": 18406
|
|
},
|
|
{
|
|
"epoch": 7.059765208110993,
|
|
"grad_norm": 0.1550612916087296,
|
|
"learning_rate": 8.317633349636712e-06,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39694520831108093,
|
|
"step": 6615,
|
|
"valid_targets_mean": 28844.7,
|
|
"valid_targets_min": 21374
|
|
},
|
|
{
|
|
"epoch": 7.065101387406617,
|
|
"grad_norm": 0.1568547835311203,
|
|
"learning_rate": 8.224907336826504e-06,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3986090421676636,
|
|
"step": 6620,
|
|
"valid_targets_mean": 28702.4,
|
|
"valid_targets_min": 19817
|
|
},
|
|
{
|
|
"epoch": 7.070437566702241,
|
|
"grad_norm": 0.14798714715897823,
|
|
"learning_rate": 8.132678914098779e-06,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39725714921951294,
|
|
"step": 6625,
|
|
"valid_targets_mean": 28755.3,
|
|
"valid_targets_min": 19321
|
|
},
|
|
{
|
|
"epoch": 7.075773745997865,
|
|
"grad_norm": 0.15140895432312573,
|
|
"learning_rate": 8.040948581501495e-06,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39604485034942627,
|
|
"step": 6630,
|
|
"valid_targets_mean": 28734.3,
|
|
"valid_targets_min": 20528
|
|
},
|
|
{
|
|
"epoch": 7.0811099252934895,
|
|
"grad_norm": 0.14021405335849116,
|
|
"learning_rate": 7.949716836382048e-06,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3950844407081604,
|
|
"step": 6635,
|
|
"valid_targets_mean": 28809.1,
|
|
"valid_targets_min": 18632
|
|
},
|
|
{
|
|
"epoch": 7.086446104589114,
|
|
"grad_norm": 0.14555571536553902,
|
|
"learning_rate": 7.858984173384598e-06,
|
|
"loss": 0.3996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3982555866241455,
|
|
"step": 6640,
|
|
"valid_targets_mean": 28952.2,
|
|
"valid_targets_min": 18491
|
|
},
|
|
{
|
|
"epoch": 7.091782283884738,
|
|
"grad_norm": 0.14723295242994291,
|
|
"learning_rate": 7.768751084447357e-06,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3947444558143616,
|
|
"step": 6645,
|
|
"valid_targets_mean": 28932.8,
|
|
"valid_targets_min": 22607
|
|
},
|
|
{
|
|
"epoch": 7.0971184631803625,
|
|
"grad_norm": 0.15320077756778971,
|
|
"learning_rate": 7.67901805879987e-06,
|
|
"loss": 0.3968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39684662222862244,
|
|
"step": 6650,
|
|
"valid_targets_mean": 28865.5,
|
|
"valid_targets_min": 23157
|
|
},
|
|
{
|
|
"epoch": 7.102454642475987,
|
|
"grad_norm": 0.1544152537510823,
|
|
"learning_rate": 7.589785582960441e-06,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39948925375938416,
|
|
"step": 6655,
|
|
"valid_targets_mean": 28771.1,
|
|
"valid_targets_min": 18799
|
|
},
|
|
{
|
|
"epoch": 7.107790821771611,
|
|
"grad_norm": 0.14117398692965866,
|
|
"learning_rate": 7.501054140733521e-06,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39882612228393555,
|
|
"step": 6660,
|
|
"valid_targets_mean": 28572.9,
|
|
"valid_targets_min": 20299
|
|
},
|
|
{
|
|
"epoch": 7.113127001067236,
|
|
"grad_norm": 0.16295455416882793,
|
|
"learning_rate": 7.412824213206981e-06,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39467161893844604,
|
|
"step": 6665,
|
|
"valid_targets_mean": 28756.4,
|
|
"valid_targets_min": 19687
|
|
},
|
|
{
|
|
"epoch": 7.11846318036286,
|
|
"grad_norm": 0.15492019017386385,
|
|
"learning_rate": 7.325096278749599e-06,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39982277154922485,
|
|
"step": 6670,
|
|
"valid_targets_mean": 28843.5,
|
|
"valid_targets_min": 21406
|
|
},
|
|
{
|
|
"epoch": 7.123799359658484,
|
|
"grad_norm": 0.14707840331415228,
|
|
"learning_rate": 7.237870813008362e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39708107709884644,
|
|
"step": 6675,
|
|
"valid_targets_mean": 28789.7,
|
|
"valid_targets_min": 19764
|
|
},
|
|
{
|
|
"epoch": 7.129135538954109,
|
|
"grad_norm": 0.14771471386609525,
|
|
"learning_rate": 7.151148288906051e-06,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39893800020217896,
|
|
"step": 6680,
|
|
"valid_targets_mean": 28768.1,
|
|
"valid_targets_min": 20127
|
|
},
|
|
{
|
|
"epoch": 7.134471718249733,
|
|
"grad_norm": 0.14680754081055059,
|
|
"learning_rate": 7.064929176638469e-06,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978503346443176,
|
|
"step": 6685,
|
|
"valid_targets_mean": 28842.6,
|
|
"valid_targets_min": 22315
|
|
},
|
|
{
|
|
"epoch": 7.139807897545357,
|
|
"grad_norm": 0.14806307005666994,
|
|
"learning_rate": 6.979213943672058e-06,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40188246965408325,
|
|
"step": 6690,
|
|
"valid_targets_mean": 28730.9,
|
|
"valid_targets_min": 18655
|
|
},
|
|
{
|
|
"epoch": 7.145144076840982,
|
|
"grad_norm": 0.14834292296077992,
|
|
"learning_rate": 6.8940030547413044e-06,
|
|
"loss": 0.3974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39668893814086914,
|
|
"step": 6695,
|
|
"valid_targets_mean": 28693.6,
|
|
"valid_targets_min": 21590
|
|
},
|
|
{
|
|
"epoch": 7.150480256136606,
|
|
"grad_norm": 0.14362800777994875,
|
|
"learning_rate": 6.809296971846213e-06,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39519941806793213,
|
|
"step": 6700,
|
|
"valid_targets_mean": 28812.1,
|
|
"valid_targets_min": 22316
|
|
},
|
|
{
|
|
"epoch": 7.1558164354322304,
|
|
"grad_norm": 0.1769964892974021,
|
|
"learning_rate": 6.725096154249822e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39827579259872437,
|
|
"step": 6705,
|
|
"valid_targets_mean": 28764.0,
|
|
"valid_targets_min": 21474
|
|
},
|
|
{
|
|
"epoch": 7.161152614727855,
|
|
"grad_norm": 0.14440606541634426,
|
|
"learning_rate": 6.6414010584756826e-06,
|
|
"loss": 0.3968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3953492343425751,
|
|
"step": 6710,
|
|
"valid_targets_mean": 28923.6,
|
|
"valid_targets_min": 23038
|
|
},
|
|
{
|
|
"epoch": 7.166488794023479,
|
|
"grad_norm": 0.14340621091725506,
|
|
"learning_rate": 6.558212138305375e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3969920873641968,
|
|
"step": 6715,
|
|
"valid_targets_mean": 28838.3,
|
|
"valid_targets_min": 22743
|
|
},
|
|
{
|
|
"epoch": 7.1718249733191035,
|
|
"grad_norm": 0.1390070556862948,
|
|
"learning_rate": 6.475529844776163e-06,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3945442736148834,
|
|
"step": 6720,
|
|
"valid_targets_mean": 28672.8,
|
|
"valid_targets_min": 17816
|
|
},
|
|
{
|
|
"epoch": 7.177161152614728,
|
|
"grad_norm": 0.13867783752463655,
|
|
"learning_rate": 6.3933546261783496e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3949880301952362,
|
|
"step": 6725,
|
|
"valid_targets_mean": 28804.1,
|
|
"valid_targets_min": 15165
|
|
},
|
|
{
|
|
"epoch": 7.182497331910352,
|
|
"grad_norm": 0.14234016351718132,
|
|
"learning_rate": 6.3116869280530514e-06,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978882133960724,
|
|
"step": 6730,
|
|
"valid_targets_mean": 28754.6,
|
|
"valid_targets_min": 19396
|
|
},
|
|
{
|
|
"epoch": 7.187833511205977,
|
|
"grad_norm": 0.13609739413508615,
|
|
"learning_rate": 6.230527193189639e-06,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39371705055236816,
|
|
"step": 6735,
|
|
"valid_targets_mean": 29004.2,
|
|
"valid_targets_min": 21072
|
|
},
|
|
{
|
|
"epoch": 7.193169690501601,
|
|
"grad_norm": 0.14313698108119827,
|
|
"learning_rate": 6.149875861623411e-06,
|
|
"loss": 0.3968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3963935077190399,
|
|
"step": 6740,
|
|
"valid_targets_mean": 28970.4,
|
|
"valid_targets_min": 22776
|
|
},
|
|
{
|
|
"epoch": 7.198505869797225,
|
|
"grad_norm": 0.17382222681898493,
|
|
"learning_rate": 6.069733370633168e-06,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4010206460952759,
|
|
"step": 6745,
|
|
"valid_targets_mean": 28954.6,
|
|
"valid_targets_min": 22455
|
|
},
|
|
{
|
|
"epoch": 7.20384204909285,
|
|
"grad_norm": 0.13672333265107187,
|
|
"learning_rate": 5.990100154738842e-06,
|
|
"loss": 0.3974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3966425955295563,
|
|
"step": 6750,
|
|
"valid_targets_mean": 28855.2,
|
|
"valid_targets_min": 19999
|
|
},
|
|
{
|
|
"epoch": 7.209178228388474,
|
|
"grad_norm": 0.14133605489320078,
|
|
"learning_rate": 5.910976645699174e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3986111879348755,
|
|
"step": 6755,
|
|
"valid_targets_mean": 28904.9,
|
|
"valid_targets_min": 20203
|
|
},
|
|
{
|
|
"epoch": 7.214514407684098,
|
|
"grad_norm": 0.13676766532037,
|
|
"learning_rate": 5.832363272509378e-06,
|
|
"loss": 0.3982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3987554907798767,
|
|
"step": 6760,
|
|
"valid_targets_mean": 28837.3,
|
|
"valid_targets_min": 22529
|
|
},
|
|
{
|
|
"epoch": 7.219850586979723,
|
|
"grad_norm": 0.1407521511478025,
|
|
"learning_rate": 5.754260461398764e-06,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40251851081848145,
|
|
"step": 6765,
|
|
"valid_targets_mean": 28777.5,
|
|
"valid_targets_min": 17618
|
|
},
|
|
{
|
|
"epoch": 7.225186766275347,
|
|
"grad_norm": 0.14439183596932256,
|
|
"learning_rate": 5.6766686358284525e-06,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40174761414527893,
|
|
"step": 6770,
|
|
"valid_targets_mean": 28680.2,
|
|
"valid_targets_min": 22633
|
|
},
|
|
{
|
|
"epoch": 7.230522945570971,
|
|
"grad_norm": 0.14054950386320145,
|
|
"learning_rate": 5.599588216489071e-06,
|
|
"loss": 0.4004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39928752183914185,
|
|
"step": 6775,
|
|
"valid_targets_mean": 28880.4,
|
|
"valid_targets_min": 22468
|
|
},
|
|
{
|
|
"epoch": 7.235859124866596,
|
|
"grad_norm": 0.14264311473697092,
|
|
"learning_rate": 5.523019621298542e-06,
|
|
"loss": 0.3969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39656123518943787,
|
|
"step": 6780,
|
|
"valid_targets_mean": 28882.2,
|
|
"valid_targets_min": 22228
|
|
},
|
|
{
|
|
"epoch": 7.24119530416222,
|
|
"grad_norm": 0.13433578292405557,
|
|
"learning_rate": 5.446963265399674e-06,
|
|
"loss": 0.3982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39557063579559326,
|
|
"step": 6785,
|
|
"valid_targets_mean": 28956.7,
|
|
"valid_targets_min": 22959
|
|
},
|
|
{
|
|
"epoch": 7.2465314834578445,
|
|
"grad_norm": 0.1407962413902608,
|
|
"learning_rate": 5.371419561158042e-06,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4022854268550873,
|
|
"step": 6790,
|
|
"valid_targets_mean": 28571.2,
|
|
"valid_targets_min": 14549
|
|
},
|
|
{
|
|
"epoch": 7.251867662753469,
|
|
"grad_norm": 0.14403343716608968,
|
|
"learning_rate": 5.296388918159723e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39685022830963135,
|
|
"step": 6795,
|
|
"valid_targets_mean": 28883.9,
|
|
"valid_targets_min": 20478
|
|
},
|
|
{
|
|
"epoch": 7.257203842049093,
|
|
"grad_norm": 0.14013211142564314,
|
|
"learning_rate": 5.221871743209039e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3962429165840149,
|
|
"step": 6800,
|
|
"valid_targets_mean": 28686.2,
|
|
"valid_targets_min": 21420
|
|
},
|
|
{
|
|
"epoch": 7.2625400213447175,
|
|
"grad_norm": 0.13661800280592126,
|
|
"learning_rate": 5.1478684403263425e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3977741003036499,
|
|
"step": 6805,
|
|
"valid_targets_mean": 28841.9,
|
|
"valid_targets_min": 21571
|
|
},
|
|
{
|
|
"epoch": 7.267876200640342,
|
|
"grad_norm": 0.13806049467788378,
|
|
"learning_rate": 5.0743794107458664e-06,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3959900140762329,
|
|
"step": 6810,
|
|
"valid_targets_mean": 28860.0,
|
|
"valid_targets_min": 18804
|
|
},
|
|
{
|
|
"epoch": 7.273212379935966,
|
|
"grad_norm": 0.13400193213805908,
|
|
"learning_rate": 5.001405052913577e-06,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3986443877220154,
|
|
"step": 6815,
|
|
"valid_targets_mean": 28876.5,
|
|
"valid_targets_min": 22960
|
|
},
|
|
{
|
|
"epoch": 7.27854855923159,
|
|
"grad_norm": 0.1318872747199963,
|
|
"learning_rate": 4.9289457624848714e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40434730052948,
|
|
"step": 6820,
|
|
"valid_targets_mean": 28844.1,
|
|
"valid_targets_min": 17444
|
|
},
|
|
{
|
|
"epoch": 7.283884738527215,
|
|
"grad_norm": 0.13980805664155702,
|
|
"learning_rate": 4.85700193232268e-06,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39328184723854065,
|
|
"step": 6825,
|
|
"valid_targets_mean": 28844.9,
|
|
"valid_targets_min": 21078
|
|
},
|
|
{
|
|
"epoch": 7.289220917822838,
|
|
"grad_norm": 0.14814865784308243,
|
|
"learning_rate": 4.785573952495059e-06,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39459824562072754,
|
|
"step": 6830,
|
|
"valid_targets_mean": 28762.5,
|
|
"valid_targets_min": 21269
|
|
},
|
|
{
|
|
"epoch": 7.294557097118463,
|
|
"grad_norm": 0.1477617513226932,
|
|
"learning_rate": 4.714662210273246e-06,
|
|
"loss": 0.3998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.398124635219574,
|
|
"step": 6835,
|
|
"valid_targets_mean": 28962.0,
|
|
"valid_targets_min": 20359
|
|
},
|
|
{
|
|
"epoch": 7.299893276414087,
|
|
"grad_norm": 0.13491286518951615,
|
|
"learning_rate": 4.644267090129562e-06,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39378926157951355,
|
|
"step": 6840,
|
|
"valid_targets_mean": 28842.9,
|
|
"valid_targets_min": 22195
|
|
},
|
|
{
|
|
"epoch": 7.3052294557097115,
|
|
"grad_norm": 0.13590823625556828,
|
|
"learning_rate": 4.5743889737352215e-06,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39613455533981323,
|
|
"step": 6845,
|
|
"valid_targets_mean": 28674.6,
|
|
"valid_targets_min": 20348
|
|
},
|
|
{
|
|
"epoch": 7.310565635005336,
|
|
"grad_norm": 0.14015434698468512,
|
|
"learning_rate": 4.505028239958353e-06,
|
|
"loss": 0.3948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39379096031188965,
|
|
"step": 6850,
|
|
"valid_targets_mean": 28751.0,
|
|
"valid_targets_min": 16576
|
|
},
|
|
{
|
|
"epoch": 7.31590181430096,
|
|
"grad_norm": 0.1375274836111597,
|
|
"learning_rate": 4.436185264861936e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39162224531173706,
|
|
"step": 6855,
|
|
"valid_targets_mean": 28707.3,
|
|
"valid_targets_min": 19748
|
|
},
|
|
{
|
|
"epoch": 7.321237993596585,
|
|
"grad_norm": 0.15924018291017472,
|
|
"learning_rate": 4.367860421701731e-06,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3961021602153778,
|
|
"step": 6860,
|
|
"valid_targets_mean": 28817.5,
|
|
"valid_targets_min": 21668
|
|
},
|
|
{
|
|
"epoch": 7.326574172892209,
|
|
"grad_norm": 0.13253022328531194,
|
|
"learning_rate": 4.300054080924254e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3973386287689209,
|
|
"step": 6865,
|
|
"valid_targets_mean": 28927.5,
|
|
"valid_targets_min": 18508
|
|
},
|
|
{
|
|
"epoch": 7.331910352187833,
|
|
"grad_norm": 0.13692155487606714,
|
|
"learning_rate": 4.2327666101647735e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39944925904273987,
|
|
"step": 6870,
|
|
"valid_targets_mean": 28797.9,
|
|
"valid_targets_min": 20510
|
|
},
|
|
{
|
|
"epoch": 7.337246531483458,
|
|
"grad_norm": 0.13264873336052005,
|
|
"learning_rate": 4.165998374245395e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3951477110385895,
|
|
"step": 6875,
|
|
"valid_targets_mean": 28752.1,
|
|
"valid_targets_min": 17730
|
|
},
|
|
{
|
|
"epoch": 7.342582710779082,
|
|
"grad_norm": 0.14478248166830496,
|
|
"learning_rate": 4.099749735172909e-06,
|
|
"loss": 0.3989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3972729444503784,
|
|
"step": 6880,
|
|
"valid_targets_mean": 28750.5,
|
|
"valid_targets_min": 22412
|
|
},
|
|
{
|
|
"epoch": 7.347918890074706,
|
|
"grad_norm": 0.1385750912506846,
|
|
"learning_rate": 4.034021052137027e-06,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3962293565273285,
|
|
"step": 6885,
|
|
"valid_targets_mean": 28836.2,
|
|
"valid_targets_min": 20189
|
|
},
|
|
{
|
|
"epoch": 7.353255069370331,
|
|
"grad_norm": 0.14214062049125484,
|
|
"learning_rate": 3.968812681508316e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39318692684173584,
|
|
"step": 6890,
|
|
"valid_targets_mean": 28721.9,
|
|
"valid_targets_min": 21114
|
|
},
|
|
{
|
|
"epoch": 7.358591248665955,
|
|
"grad_norm": 0.1338862866468272,
|
|
"learning_rate": 3.904124976836254e-06,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3970756232738495,
|
|
"step": 6895,
|
|
"valid_targets_mean": 28713.5,
|
|
"valid_targets_min": 19112
|
|
},
|
|
{
|
|
"epoch": 7.363927427961579,
|
|
"grad_norm": 0.14940392657549093,
|
|
"learning_rate": 3.8399582888473895e-06,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3941649794578552,
|
|
"step": 6900,
|
|
"valid_targets_mean": 28974.6,
|
|
"valid_targets_min": 20549
|
|
},
|
|
{
|
|
"epoch": 7.369263607257204,
|
|
"grad_norm": 0.13475410359357387,
|
|
"learning_rate": 3.776312965443374e-06,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3961244821548462,
|
|
"step": 6905,
|
|
"valid_targets_mean": 28744.7,
|
|
"valid_targets_min": 20562
|
|
},
|
|
{
|
|
"epoch": 7.374599786552828,
|
|
"grad_norm": 0.13723555168541482,
|
|
"learning_rate": 3.713189351699131e-06,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40024417638778687,
|
|
"step": 6910,
|
|
"valid_targets_mean": 28850.6,
|
|
"valid_targets_min": 21788
|
|
},
|
|
{
|
|
"epoch": 7.3799359658484525,
|
|
"grad_norm": 0.1332438575036423,
|
|
"learning_rate": 3.6505877898609263e-06,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.399570494890213,
|
|
"step": 6915,
|
|
"valid_targets_mean": 28985.8,
|
|
"valid_targets_min": 22477
|
|
},
|
|
{
|
|
"epoch": 7.385272145144077,
|
|
"grad_norm": 0.13848205951792947,
|
|
"learning_rate": 3.5885086193445883e-06,
|
|
"loss": 0.3964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3938809633255005,
|
|
"step": 6920,
|
|
"valid_targets_mean": 28860.3,
|
|
"valid_targets_min": 22807
|
|
},
|
|
{
|
|
"epoch": 7.390608324439701,
|
|
"grad_norm": 0.13650005467831897,
|
|
"learning_rate": 3.526952176733578e-06,
|
|
"loss": 0.3948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39737313985824585,
|
|
"step": 6925,
|
|
"valid_targets_mean": 28816.6,
|
|
"valid_targets_min": 21229
|
|
},
|
|
{
|
|
"epoch": 7.3959445037353255,
|
|
"grad_norm": 0.12888511781642153,
|
|
"learning_rate": 3.46591879577719e-06,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3936184048652649,
|
|
"step": 6930,
|
|
"valid_targets_mean": 28959.3,
|
|
"valid_targets_min": 17707
|
|
},
|
|
{
|
|
"epoch": 7.40128068303095,
|
|
"grad_norm": 0.13263492604068003,
|
|
"learning_rate": 3.4054088073888435e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3939823508262634,
|
|
"step": 6935,
|
|
"valid_targets_mean": 28700.5,
|
|
"valid_targets_min": 21618
|
|
},
|
|
{
|
|
"epoch": 7.406616862326574,
|
|
"grad_norm": 0.13102061046697075,
|
|
"learning_rate": 3.3454225396441164e-06,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.397549033164978,
|
|
"step": 6940,
|
|
"valid_targets_mean": 28769.5,
|
|
"valid_targets_min": 20988
|
|
},
|
|
{
|
|
"epoch": 7.411953041622199,
|
|
"grad_norm": 0.13206284597273021,
|
|
"learning_rate": 3.285960317779102e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39504343271255493,
|
|
"step": 6945,
|
|
"valid_targets_mean": 28798.3,
|
|
"valid_targets_min": 19081
|
|
},
|
|
{
|
|
"epoch": 7.417289220917823,
|
|
"grad_norm": 0.14247388308189723,
|
|
"learning_rate": 3.2270224641886117e-06,
|
|
"loss": 0.3998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39570754766464233,
|
|
"step": 6950,
|
|
"valid_targets_mean": 28884.1,
|
|
"valid_targets_min": 21183
|
|
},
|
|
{
|
|
"epoch": 7.422625400213447,
|
|
"grad_norm": 0.12933879296966458,
|
|
"learning_rate": 3.168609298424374e-06,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39268964529037476,
|
|
"step": 6955,
|
|
"valid_targets_mean": 28773.8,
|
|
"valid_targets_min": 21747
|
|
},
|
|
{
|
|
"epoch": 7.427961579509072,
|
|
"grad_norm": 0.13258932817271468,
|
|
"learning_rate": 3.110721137193384e-06,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3964608907699585,
|
|
"step": 6960,
|
|
"valid_targets_mean": 28910.0,
|
|
"valid_targets_min": 21896
|
|
},
|
|
{
|
|
"epoch": 7.433297758804696,
|
|
"grad_norm": 0.14054028295501642,
|
|
"learning_rate": 3.053358294356101e-06,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39511436223983765,
|
|
"step": 6965,
|
|
"valid_targets_mean": 28822.8,
|
|
"valid_targets_min": 22606
|
|
},
|
|
{
|
|
"epoch": 7.43863393810032,
|
|
"grad_norm": 0.1447540553523416,
|
|
"learning_rate": 2.9965210809248412e-06,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3964189887046814,
|
|
"step": 6970,
|
|
"valid_targets_mean": 28971.0,
|
|
"valid_targets_min": 20946
|
|
},
|
|
{
|
|
"epoch": 7.443970117395945,
|
|
"grad_norm": 0.12916593251265385,
|
|
"learning_rate": 2.9402098050619775e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3927098512649536,
|
|
"step": 6975,
|
|
"valid_targets_mean": 29101.2,
|
|
"valid_targets_min": 20495
|
|
},
|
|
{
|
|
"epoch": 7.449306296691569,
|
|
"grad_norm": 0.15834400094498974,
|
|
"learning_rate": 2.884424772078398e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39576098322868347,
|
|
"step": 6980,
|
|
"valid_targets_mean": 28813.8,
|
|
"valid_targets_min": 21778
|
|
},
|
|
{
|
|
"epoch": 7.454642475987193,
|
|
"grad_norm": 0.13571641243628318,
|
|
"learning_rate": 2.829166284431761e-06,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3991106152534485,
|
|
"step": 6985,
|
|
"valid_targets_mean": 28901.6,
|
|
"valid_targets_min": 19732
|
|
},
|
|
{
|
|
"epoch": 7.459978655282818,
|
|
"grad_norm": 0.1331103948023071,
|
|
"learning_rate": 2.7744346417248546e-06,
|
|
"loss": 0.3974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3965522050857544,
|
|
"step": 6990,
|
|
"valid_targets_mean": 28912.5,
|
|
"valid_targets_min": 17680
|
|
},
|
|
{
|
|
"epoch": 7.465314834578442,
|
|
"grad_norm": 0.1319675964651442,
|
|
"learning_rate": 2.7202301407040274e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3990844488143921,
|
|
"step": 6995,
|
|
"valid_targets_mean": 28948.4,
|
|
"valid_targets_min": 19629
|
|
},
|
|
{
|
|
"epoch": 7.4706510138740665,
|
|
"grad_norm": 0.1405373888533213,
|
|
"learning_rate": 2.6665530752575274e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3977741599082947,
|
|
"step": 7000,
|
|
"valid_targets_mean": 28642.5,
|
|
"valid_targets_min": 19049
|
|
},
|
|
{
|
|
"epoch": 7.475987193169691,
|
|
"grad_norm": 0.12816007543103924,
|
|
"learning_rate": 2.613403736413944e-06,
|
|
"loss": 0.3982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3968815803527832,
|
|
"step": 7005,
|
|
"valid_targets_mean": 28858.9,
|
|
"valid_targets_min": 22519
|
|
},
|
|
{
|
|
"epoch": 7.481323372465315,
|
|
"grad_norm": 0.13763866771563157,
|
|
"learning_rate": 2.560782412340623e-06,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3996983766555786,
|
|
"step": 7010,
|
|
"valid_targets_mean": 28726.6,
|
|
"valid_targets_min": 19324
|
|
},
|
|
{
|
|
"epoch": 7.48665955176094,
|
|
"grad_norm": 0.13402555031487307,
|
|
"learning_rate": 2.508689388342078e-06,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4007349908351898,
|
|
"step": 7015,
|
|
"valid_targets_mean": 28784.0,
|
|
"valid_targets_min": 20900
|
|
},
|
|
{
|
|
"epoch": 7.491995731056564,
|
|
"grad_norm": 0.13388666886846984,
|
|
"learning_rate": 2.45712494685848e-06,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.399408757686615,
|
|
"step": 7020,
|
|
"valid_targets_mean": 28943.9,
|
|
"valid_targets_min": 18428
|
|
},
|
|
{
|
|
"epoch": 7.497331910352187,
|
|
"grad_norm": 0.13471132033960173,
|
|
"learning_rate": 2.4060893674640815e-06,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4004710912704468,
|
|
"step": 7025,
|
|
"valid_targets_mean": 28850.0,
|
|
"valid_targets_min": 22619
|
|
},
|
|
{
|
|
"epoch": 7.502668089647813,
|
|
"grad_norm": 0.12545470797822975,
|
|
"learning_rate": 2.3555829268657626e-06,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4004118740558624,
|
|
"step": 7030,
|
|
"valid_targets_mean": 28785.9,
|
|
"valid_targets_min": 21242
|
|
},
|
|
{
|
|
"epoch": 7.508004268943436,
|
|
"grad_norm": 0.1309840290867499,
|
|
"learning_rate": 2.305605898901486e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.399819016456604,
|
|
"step": 7035,
|
|
"valid_targets_mean": 29010.9,
|
|
"valid_targets_min": 16563
|
|
},
|
|
{
|
|
"epoch": 7.513340448239061,
|
|
"grad_norm": 0.12977304528913586,
|
|
"learning_rate": 2.25615855453879e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39655691385269165,
|
|
"step": 7040,
|
|
"valid_targets_mean": 28872.3,
|
|
"valid_targets_min": 22548
|
|
},
|
|
{
|
|
"epoch": 7.518676627534685,
|
|
"grad_norm": 0.13233513610791645,
|
|
"learning_rate": 2.207241161873419e-06,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39584222435951233,
|
|
"step": 7045,
|
|
"valid_targets_mean": 28785.3,
|
|
"valid_targets_min": 22175
|
|
},
|
|
{
|
|
"epoch": 7.524012806830309,
|
|
"grad_norm": 0.1306727802585294,
|
|
"learning_rate": 2.15885398612774e-06,
|
|
"loss": 0.3943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39712512493133545,
|
|
"step": 7050,
|
|
"valid_targets_mean": 28708.5,
|
|
"valid_targets_min": 19500
|
|
},
|
|
{
|
|
"epoch": 7.5293489861259335,
|
|
"grad_norm": 0.14629483828482254,
|
|
"learning_rate": 2.110997289649397e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39369213581085205,
|
|
"step": 7055,
|
|
"valid_targets_mean": 28865.4,
|
|
"valid_targets_min": 17438
|
|
},
|
|
{
|
|
"epoch": 7.534685165421558,
|
|
"grad_norm": 0.13031826983551667,
|
|
"learning_rate": 2.0636713319098444e-06,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39837566018104553,
|
|
"step": 7060,
|
|
"valid_targets_mean": 28751.2,
|
|
"valid_targets_min": 20910
|
|
},
|
|
{
|
|
"epoch": 7.540021344717182,
|
|
"grad_norm": 0.13010925145229557,
|
|
"learning_rate": 2.0168763695029735e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39550474286079407,
|
|
"step": 7065,
|
|
"valid_targets_mean": 28783.4,
|
|
"valid_targets_min": 20371
|
|
},
|
|
{
|
|
"epoch": 7.545357524012807,
|
|
"grad_norm": 0.13859542215077358,
|
|
"learning_rate": 1.9706126561436775e-06,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3975289762020111,
|
|
"step": 7070,
|
|
"valid_targets_mean": 28944.4,
|
|
"valid_targets_min": 17638
|
|
},
|
|
{
|
|
"epoch": 7.550693703308431,
|
|
"grad_norm": 0.12883914997866086,
|
|
"learning_rate": 1.9248804426665323e-06,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.395477831363678,
|
|
"step": 7075,
|
|
"valid_targets_mean": 28890.7,
|
|
"valid_targets_min": 22324
|
|
},
|
|
{
|
|
"epoch": 7.556029882604055,
|
|
"grad_norm": 0.12680204603676157,
|
|
"learning_rate": 1.8796799770243845e-06,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39461207389831543,
|
|
"step": 7080,
|
|
"valid_targets_mean": 28871.8,
|
|
"valid_targets_min": 21378
|
|
},
|
|
{
|
|
"epoch": 7.56136606189968,
|
|
"grad_norm": 0.12948065308048,
|
|
"learning_rate": 1.835011504287032e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3954583406448364,
|
|
"step": 7085,
|
|
"valid_targets_mean": 28856.8,
|
|
"valid_targets_min": 21181
|
|
},
|
|
{
|
|
"epoch": 7.566702241195304,
|
|
"grad_norm": 0.12563199927845528,
|
|
"learning_rate": 1.790875266639891e-06,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39542630314826965,
|
|
"step": 7090,
|
|
"valid_targets_mean": 28895.1,
|
|
"valid_targets_min": 18822
|
|
},
|
|
{
|
|
"epoch": 7.572038420490928,
|
|
"grad_norm": 0.13220859394541173,
|
|
"learning_rate": 1.7472715033826747e-06,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39799848198890686,
|
|
"step": 7095,
|
|
"valid_targets_mean": 28812.4,
|
|
"valid_targets_min": 20946
|
|
},
|
|
{
|
|
"epoch": 7.577374599786553,
|
|
"grad_norm": 0.12830894957199165,
|
|
"learning_rate": 1.7042004509281284e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3940737843513489,
|
|
"step": 7100,
|
|
"valid_targets_mean": 28974.1,
|
|
"valid_targets_min": 22172
|
|
},
|
|
{
|
|
"epoch": 7.582710779082177,
|
|
"grad_norm": 0.12304144890226744,
|
|
"learning_rate": 1.6616623428006961e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40141934156417847,
|
|
"step": 7105,
|
|
"valid_targets_mean": 28829.5,
|
|
"valid_targets_min": 21348
|
|
},
|
|
{
|
|
"epoch": 7.588046958377801,
|
|
"grad_norm": 0.13685029611412847,
|
|
"learning_rate": 1.6196574096353e-06,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3940128982067108,
|
|
"step": 7110,
|
|
"valid_targets_mean": 28937.2,
|
|
"valid_targets_min": 22069
|
|
},
|
|
{
|
|
"epoch": 7.593383137673426,
|
|
"grad_norm": 0.1320911496723597,
|
|
"learning_rate": 1.578185879176064e-06,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40384820103645325,
|
|
"step": 7115,
|
|
"valid_targets_mean": 28760.0,
|
|
"valid_targets_min": 21903
|
|
},
|
|
{
|
|
"epoch": 7.59871931696905,
|
|
"grad_norm": 0.12524210991399579,
|
|
"learning_rate": 1.5372479762750691e-06,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39434826374053955,
|
|
"step": 7120,
|
|
"valid_targets_mean": 28681.2,
|
|
"valid_targets_min": 21813
|
|
},
|
|
{
|
|
"epoch": 7.6040554962646745,
|
|
"grad_norm": 0.12842413602049793,
|
|
"learning_rate": 1.4968439228912e-06,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3997383415699005,
|
|
"step": 7125,
|
|
"valid_targets_mean": 28874.6,
|
|
"valid_targets_min": 22386
|
|
},
|
|
{
|
|
"epoch": 7.609391675560299,
|
|
"grad_norm": 0.13435131545885384,
|
|
"learning_rate": 1.4569739380888458e-06,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4004368484020233,
|
|
"step": 7130,
|
|
"valid_targets_mean": 28933.9,
|
|
"valid_targets_min": 19047
|
|
},
|
|
{
|
|
"epoch": 7.614727854855923,
|
|
"grad_norm": 0.1327719002764592,
|
|
"learning_rate": 1.4176382380367782e-06,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39994803071022034,
|
|
"step": 7135,
|
|
"valid_targets_mean": 28925.2,
|
|
"valid_targets_min": 21066
|
|
},
|
|
{
|
|
"epoch": 7.6200640341515475,
|
|
"grad_norm": 0.1299401408543429,
|
|
"learning_rate": 1.3788370360069636e-06,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3979865312576294,
|
|
"step": 7140,
|
|
"valid_targets_mean": 28861.9,
|
|
"valid_targets_min": 23294
|
|
},
|
|
{
|
|
"epoch": 7.625400213447172,
|
|
"grad_norm": 0.15674138969926396,
|
|
"learning_rate": 1.3405705423734094e-06,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3987683653831482,
|
|
"step": 7145,
|
|
"valid_targets_mean": 28867.2,
|
|
"valid_targets_min": 21800
|
|
},
|
|
{
|
|
"epoch": 7.630736392742796,
|
|
"grad_norm": 0.12468537664092988,
|
|
"learning_rate": 1.3028389646110084e-06,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39801982045173645,
|
|
"step": 7150,
|
|
"valid_targets_mean": 28899.1,
|
|
"valid_targets_min": 19178
|
|
},
|
|
{
|
|
"epoch": 7.636072572038421,
|
|
"grad_norm": 0.12518680881130087,
|
|
"learning_rate": 1.2656425072944068e-06,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4001878499984741,
|
|
"step": 7155,
|
|
"valid_targets_mean": 28741.1,
|
|
"valid_targets_min": 20442
|
|
},
|
|
{
|
|
"epoch": 7.641408751334045,
|
|
"grad_norm": 0.13378082935458932,
|
|
"learning_rate": 1.228981372096949e-06,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3943834602832794,
|
|
"step": 7160,
|
|
"valid_targets_mean": 28739.6,
|
|
"valid_targets_min": 21336
|
|
},
|
|
{
|
|
"epoch": 7.646744930629669,
|
|
"grad_norm": 0.12480255974177112,
|
|
"learning_rate": 1.192855757789546e-06,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39438241720199585,
|
|
"step": 7165,
|
|
"valid_targets_mean": 28988.1,
|
|
"valid_targets_min": 22430
|
|
},
|
|
{
|
|
"epoch": 7.652081109925294,
|
|
"grad_norm": 0.12332588429332686,
|
|
"learning_rate": 1.1572658602395426e-06,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40162521600723267,
|
|
"step": 7170,
|
|
"valid_targets_mean": 28919.3,
|
|
"valid_targets_min": 23571
|
|
},
|
|
{
|
|
"epoch": 7.657417289220918,
|
|
"grad_norm": 0.1271716835328731,
|
|
"learning_rate": 1.122211872409784e-06,
|
|
"loss": 0.3987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3963414430618286,
|
|
"step": 7175,
|
|
"valid_targets_mean": 28940.8,
|
|
"valid_targets_min": 22922
|
|
},
|
|
{
|
|
"epoch": 7.662753468516542,
|
|
"grad_norm": 0.13031537304464122,
|
|
"learning_rate": 1.087693984357452e-06,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39790183305740356,
|
|
"step": 7180,
|
|
"valid_targets_mean": 28825.0,
|
|
"valid_targets_min": 19505
|
|
},
|
|
{
|
|
"epoch": 7.668089647812167,
|
|
"grad_norm": 0.15005193867568728,
|
|
"learning_rate": 1.0537123832330964e-06,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39635568857192993,
|
|
"step": 7185,
|
|
"valid_targets_mean": 28871.7,
|
|
"valid_targets_min": 20854
|
|
},
|
|
{
|
|
"epoch": 7.673425827107791,
|
|
"grad_norm": 0.12521205270099878,
|
|
"learning_rate": 1.0202672532796055e-06,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.397771418094635,
|
|
"step": 7190,
|
|
"valid_targets_mean": 28996.4,
|
|
"valid_targets_min": 18700
|
|
},
|
|
{
|
|
"epoch": 7.678762006403415,
|
|
"grad_norm": 0.1263847601133633,
|
|
"learning_rate": 9.87358775831182e-07,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39790722727775574,
|
|
"step": 7195,
|
|
"valid_targets_mean": 28948.4,
|
|
"valid_targets_min": 22960
|
|
},
|
|
{
|
|
"epoch": 7.68409818569904,
|
|
"grad_norm": 0.14131809458762648,
|
|
"learning_rate": 9.549871293124234e-07,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3980499804019928,
|
|
"step": 7200,
|
|
"valid_targets_mean": 28758.7,
|
|
"valid_targets_min": 20945
|
|
},
|
|
{
|
|
"epoch": 7.689434364994664,
|
|
"grad_norm": 0.1253039153817784,
|
|
"learning_rate": 9.231524892372889e-07,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978698253631592,
|
|
"step": 7205,
|
|
"valid_targets_mean": 28422.1,
|
|
"valid_targets_min": 17803
|
|
},
|
|
{
|
|
"epoch": 7.6947705442902885,
|
|
"grad_norm": 0.12140794592880637,
|
|
"learning_rate": 8.918550282081772e-07,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39450564980506897,
|
|
"step": 7210,
|
|
"valid_targets_mean": 28683.9,
|
|
"valid_targets_min": 20870
|
|
},
|
|
{
|
|
"epoch": 7.700106723585913,
|
|
"grad_norm": 0.12116805708832058,
|
|
"learning_rate": 8.610949159149728e-07,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39463678002357483,
|
|
"step": 7215,
|
|
"valid_targets_mean": 28825.8,
|
|
"valid_targets_min": 19874
|
|
},
|
|
{
|
|
"epoch": 7.705442902881536,
|
|
"grad_norm": 0.12281107442757595,
|
|
"learning_rate": 8.308723191341683e-07,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39416587352752686,
|
|
"step": 7220,
|
|
"valid_targets_mean": 28682.7,
|
|
"valid_targets_min": 18031
|
|
},
|
|
{
|
|
"epoch": 7.710779082177162,
|
|
"grad_norm": 0.14860605326149026,
|
|
"learning_rate": 8.011874017279208e-07,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3981039822101593,
|
|
"step": 7225,
|
|
"valid_targets_mean": 28751.4,
|
|
"valid_targets_min": 21110
|
|
},
|
|
{
|
|
"epoch": 7.716115261472785,
|
|
"grad_norm": 0.12723855024397449,
|
|
"learning_rate": 7.72040324643164e-07,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39874446392059326,
|
|
"step": 7230,
|
|
"valid_targets_mean": 28755.7,
|
|
"valid_targets_min": 22233
|
|
},
|
|
{
|
|
"epoch": 7.72145144076841,
|
|
"grad_norm": 0.11917391741979977,
|
|
"learning_rate": 7.434312459107639e-07,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40069904923439026,
|
|
"step": 7235,
|
|
"valid_targets_mean": 28949.6,
|
|
"valid_targets_min": 20294
|
|
},
|
|
{
|
|
"epoch": 7.726787620064034,
|
|
"grad_norm": 0.12341547556245404,
|
|
"learning_rate": 7.153603206446203e-07,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39813292026519775,
|
|
"step": 7240,
|
|
"valid_targets_mean": 28706.5,
|
|
"valid_targets_min": 16608
|
|
},
|
|
{
|
|
"epoch": 7.732123799359658,
|
|
"grad_norm": 0.14878463296184147,
|
|
"learning_rate": 6.878277010408774e-07,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39200353622436523,
|
|
"step": 7245,
|
|
"valid_targets_mean": 28828.8,
|
|
"valid_targets_min": 21907
|
|
},
|
|
{
|
|
"epoch": 7.7374599786552825,
|
|
"grad_norm": 0.1221577876639434,
|
|
"learning_rate": 6.60833536377059e-07,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4019637107849121,
|
|
"step": 7250,
|
|
"valid_targets_mean": 28878.7,
|
|
"valid_targets_min": 18154
|
|
},
|
|
{
|
|
"epoch": 7.742796157950907,
|
|
"grad_norm": 0.12393055640085526,
|
|
"learning_rate": 6.343779730112464e-07,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3989558815956116,
|
|
"step": 7255,
|
|
"valid_targets_mean": 28855.9,
|
|
"valid_targets_min": 23389
|
|
},
|
|
{
|
|
"epoch": 7.748132337246531,
|
|
"grad_norm": 0.12330587831168091,
|
|
"learning_rate": 6.084611543813679e-07,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40006476640701294,
|
|
"step": 7260,
|
|
"valid_targets_mean": 28777.5,
|
|
"valid_targets_min": 19307
|
|
},
|
|
{
|
|
"epoch": 7.7534685165421555,
|
|
"grad_norm": 0.12246315853862788,
|
|
"learning_rate": 5.830832210042991e-07,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3977513909339905,
|
|
"step": 7265,
|
|
"valid_targets_mean": 28741.4,
|
|
"valid_targets_min": 22047
|
|
},
|
|
{
|
|
"epoch": 7.75880469583778,
|
|
"grad_norm": 0.1247950006517304,
|
|
"learning_rate": 5.582443104752089e-07,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3957824409008026,
|
|
"step": 7270,
|
|
"valid_targets_mean": 28763.2,
|
|
"valid_targets_min": 18758
|
|
},
|
|
{
|
|
"epoch": 7.764140875133404,
|
|
"grad_norm": 0.12264806306956744,
|
|
"learning_rate": 5.339445574667479e-07,
|
|
"loss": 0.3981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39586859941482544,
|
|
"step": 7275,
|
|
"valid_targets_mean": 28837.8,
|
|
"valid_targets_min": 15054
|
|
},
|
|
{
|
|
"epoch": 7.769477054429029,
|
|
"grad_norm": 0.14603501645455727,
|
|
"learning_rate": 5.101840937283498e-07,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978617787361145,
|
|
"step": 7280,
|
|
"valid_targets_mean": 28967.5,
|
|
"valid_targets_min": 18255
|
|
},
|
|
{
|
|
"epoch": 7.774813233724653,
|
|
"grad_norm": 0.12172511207399195,
|
|
"learning_rate": 4.869630480854981e-07,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3975546658039093,
|
|
"step": 7285,
|
|
"valid_targets_mean": 29052.4,
|
|
"valid_targets_min": 21218
|
|
},
|
|
{
|
|
"epoch": 7.780149413020277,
|
|
"grad_norm": 0.12261141364651407,
|
|
"learning_rate": 4.6428154643904933e-07,
|
|
"loss": 0.399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39828208088874817,
|
|
"step": 7290,
|
|
"valid_targets_mean": 28853.3,
|
|
"valid_targets_min": 20879
|
|
},
|
|
{
|
|
"epoch": 7.785485592315902,
|
|
"grad_norm": 0.11811955548665705,
|
|
"learning_rate": 4.4213971176451095e-07,
|
|
"loss": 0.3938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3930383324623108,
|
|
"step": 7295,
|
|
"valid_targets_mean": 28826.9,
|
|
"valid_targets_min": 21494
|
|
},
|
|
{
|
|
"epoch": 7.790821771611526,
|
|
"grad_norm": 0.12841526707703033,
|
|
"learning_rate": 4.2053766411144225e-07,
|
|
"loss": 0.3985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39577236771583557,
|
|
"step": 7300,
|
|
"valid_targets_mean": 28826.0,
|
|
"valid_targets_min": 22310
|
|
},
|
|
{
|
|
"epoch": 7.79615795090715,
|
|
"grad_norm": 0.12011802895684066,
|
|
"learning_rate": 3.994755206027212e-07,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39808082580566406,
|
|
"step": 7305,
|
|
"valid_targets_mean": 28803.4,
|
|
"valid_targets_min": 21407
|
|
},
|
|
{
|
|
"epoch": 7.801494130202775,
|
|
"grad_norm": 0.12010863458140218,
|
|
"learning_rate": 3.789533954339897e-07,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978669047355652,
|
|
"step": 7310,
|
|
"valid_targets_mean": 28767.3,
|
|
"valid_targets_min": 19461
|
|
},
|
|
{
|
|
"epoch": 7.806830309498399,
|
|
"grad_norm": 0.12106939890963439,
|
|
"learning_rate": 3.589713998729427e-07,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3986453115940094,
|
|
"step": 7315,
|
|
"valid_targets_mean": 28953.8,
|
|
"valid_targets_min": 23091
|
|
},
|
|
{
|
|
"epoch": 7.812166488794023,
|
|
"grad_norm": 0.14095560238184315,
|
|
"learning_rate": 3.395296422588512e-07,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39821505546569824,
|
|
"step": 7320,
|
|
"valid_targets_mean": 28776.2,
|
|
"valid_targets_min": 20422
|
|
},
|
|
{
|
|
"epoch": 7.817502668089648,
|
|
"grad_norm": 0.1215992441103773,
|
|
"learning_rate": 3.20628228001818e-07,
|
|
"loss": 0.3974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4034339189529419,
|
|
"step": 7325,
|
|
"valid_targets_mean": 28778.5,
|
|
"valid_targets_min": 18860
|
|
},
|
|
{
|
|
"epoch": 7.822838847385272,
|
|
"grad_norm": 0.12213739439146389,
|
|
"learning_rate": 3.022672595823672e-07,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3980867266654968,
|
|
"step": 7330,
|
|
"valid_targets_mean": 28801.7,
|
|
"valid_targets_min": 20663
|
|
},
|
|
{
|
|
"epoch": 7.8281750266808965,
|
|
"grad_norm": 0.11906318514646144,
|
|
"learning_rate": 2.844468365507447e-07,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39695197343826294,
|
|
"step": 7335,
|
|
"valid_targets_mean": 28957.1,
|
|
"valid_targets_min": 21597
|
|
},
|
|
{
|
|
"epoch": 7.833511205976521,
|
|
"grad_norm": 0.11748452176757196,
|
|
"learning_rate": 2.6716705552649603e-07,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3960111141204834,
|
|
"step": 7340,
|
|
"valid_targets_mean": 28855.7,
|
|
"valid_targets_min": 19780
|
|
},
|
|
{
|
|
"epoch": 7.838847385272145,
|
|
"grad_norm": 0.13422349699284086,
|
|
"learning_rate": 2.5042801019785624e-07,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3954845368862152,
|
|
"step": 7345,
|
|
"valid_targets_mean": 28988.1,
|
|
"valid_targets_min": 22849
|
|
},
|
|
{
|
|
"epoch": 7.8441835645677696,
|
|
"grad_norm": 0.12139430060409888,
|
|
"learning_rate": 2.342297913212943e-07,
|
|
"loss": 0.3957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39191263914108276,
|
|
"step": 7350,
|
|
"valid_targets_mean": 28993.3,
|
|
"valid_targets_min": 23462
|
|
},
|
|
{
|
|
"epoch": 7.849519743863394,
|
|
"grad_norm": 0.12127261635607517,
|
|
"learning_rate": 2.1857248672100262e-07,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39600613713264465,
|
|
"step": 7355,
|
|
"valid_targets_mean": 28810.9,
|
|
"valid_targets_min": 22131
|
|
},
|
|
{
|
|
"epoch": 7.854855923159018,
|
|
"grad_norm": 0.11960436598459143,
|
|
"learning_rate": 2.0345618128840838e-07,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39346587657928467,
|
|
"step": 7360,
|
|
"valid_targets_mean": 28869.0,
|
|
"valid_targets_min": 21748
|
|
},
|
|
{
|
|
"epoch": 7.860192102454643,
|
|
"grad_norm": 0.11716437914975067,
|
|
"learning_rate": 1.888809569817518e-07,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39495378732681274,
|
|
"step": 7365,
|
|
"valid_targets_mean": 28888.0,
|
|
"valid_targets_min": 22788
|
|
},
|
|
{
|
|
"epoch": 7.865528281750267,
|
|
"grad_norm": 0.12118563526720992,
|
|
"learning_rate": 1.7484689282558643e-07,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39744696021080017,
|
|
"step": 7370,
|
|
"valid_targets_mean": 28844.5,
|
|
"valid_targets_min": 20332
|
|
},
|
|
{
|
|
"epoch": 7.870864461045891,
|
|
"grad_norm": 0.1203814795413713,
|
|
"learning_rate": 1.6135406491041282e-07,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3969980776309967,
|
|
"step": 7375,
|
|
"valid_targets_mean": 28854.4,
|
|
"valid_targets_min": 20845
|
|
},
|
|
{
|
|
"epoch": 7.876200640341516,
|
|
"grad_norm": 0.11945569838464438,
|
|
"learning_rate": 1.484025463921901e-07,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3918302059173584,
|
|
"step": 7380,
|
|
"valid_targets_mean": 28731.7,
|
|
"valid_targets_min": 18884
|
|
},
|
|
{
|
|
"epoch": 7.88153681963714,
|
|
"grad_norm": 0.11756182206253876,
|
|
"learning_rate": 1.359924074920249e-07,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40109801292419434,
|
|
"step": 7385,
|
|
"valid_targets_mean": 28831.0,
|
|
"valid_targets_min": 19023
|
|
},
|
|
{
|
|
"epoch": 7.886872998932764,
|
|
"grad_norm": 0.1340519609225345,
|
|
"learning_rate": 1.2412371549573864e-07,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39418697357177734,
|
|
"step": 7390,
|
|
"valid_targets_mean": 28842.2,
|
|
"valid_targets_min": 21875
|
|
},
|
|
{
|
|
"epoch": 7.892209178228389,
|
|
"grad_norm": 0.11587335971582481,
|
|
"learning_rate": 1.1279653475350094e-07,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39782729744911194,
|
|
"step": 7395,
|
|
"valid_targets_mean": 28892.1,
|
|
"valid_targets_min": 19641
|
|
},
|
|
{
|
|
"epoch": 7.897545357524013,
|
|
"grad_norm": 0.11750288132031445,
|
|
"learning_rate": 1.0201092667950773e-07,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3983398675918579,
|
|
"step": 7400,
|
|
"valid_targets_mean": 28828.2,
|
|
"valid_targets_min": 21791
|
|
},
|
|
{
|
|
"epoch": 7.9028815368196375,
|
|
"grad_norm": 0.13435898758171472,
|
|
"learning_rate": 9.176694975161493e-08,
|
|
"loss": 0.3983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39778202772140503,
|
|
"step": 7405,
|
|
"valid_targets_mean": 29013.8,
|
|
"valid_targets_min": 20210
|
|
},
|
|
{
|
|
"epoch": 7.908217716115262,
|
|
"grad_norm": 0.12001733613834027,
|
|
"learning_rate": 8.206465951104969e-08,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.392083078622818,
|
|
"step": 7410,
|
|
"valid_targets_mean": 28912.8,
|
|
"valid_targets_min": 20190
|
|
},
|
|
{
|
|
"epoch": 7.913553895410886,
|
|
"grad_norm": 0.11925283075717091,
|
|
"learning_rate": 7.290410856211071e-08,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3957703411579132,
|
|
"step": 7415,
|
|
"valid_targets_mean": 28886.9,
|
|
"valid_targets_min": 22025
|
|
},
|
|
{
|
|
"epoch": 7.9188900747065105,
|
|
"grad_norm": 0.14003332102035307,
|
|
"learning_rate": 6.428534657185736e-08,
|
|
"loss": 0.397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39609062671661377,
|
|
"step": 7420,
|
|
"valid_targets_mean": 28744.4,
|
|
"valid_targets_min": 19828
|
|
},
|
|
{
|
|
"epoch": 7.924226254002134,
|
|
"grad_norm": 0.11718337201752242,
|
|
"learning_rate": 5.620842026985429e-08,
|
|
"loss": 0.3954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3998251259326935,
|
|
"step": 7425,
|
|
"valid_targets_mean": 28700.6,
|
|
"valid_targets_min": 16080
|
|
},
|
|
{
|
|
"epoch": 7.929562433297759,
|
|
"grad_norm": 0.1408268207394538,
|
|
"learning_rate": 4.8673373447916116e-08,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39620524644851685,
|
|
"step": 7430,
|
|
"valid_targets_mean": 28704.3,
|
|
"valid_targets_min": 21952
|
|
},
|
|
{
|
|
"epoch": 7.934898612593383,
|
|
"grad_norm": 0.12045696435740527,
|
|
"learning_rate": 4.1680246959896473e-08,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3986198604106903,
|
|
"step": 7435,
|
|
"valid_targets_mean": 28944.1,
|
|
"valid_targets_min": 18722
|
|
},
|
|
{
|
|
"epoch": 7.940234791889008,
|
|
"grad_norm": 0.12033025936809216,
|
|
"learning_rate": 3.522907872141046e-08,
|
|
"loss": 0.3968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3966403603553772,
|
|
"step": 7440,
|
|
"valid_targets_mean": 28832.9,
|
|
"valid_targets_min": 18828
|
|
},
|
|
{
|
|
"epoch": 7.945570971184631,
|
|
"grad_norm": 0.1386418674094808,
|
|
"learning_rate": 2.9319903709679186e-08,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.396666944026947,
|
|
"step": 7445,
|
|
"valid_targets_mean": 28944.9,
|
|
"valid_targets_min": 21863
|
|
},
|
|
{
|
|
"epoch": 7.950907150480256,
|
|
"grad_norm": 0.11831593016496478,
|
|
"learning_rate": 2.3952753963318865e-08,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3972724974155426,
|
|
"step": 7450,
|
|
"valid_targets_mean": 29041.1,
|
|
"valid_targets_min": 20975
|
|
},
|
|
{
|
|
"epoch": 7.95624332977588,
|
|
"grad_norm": 0.11586980929359303,
|
|
"learning_rate": 1.9127658582163145e-08,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3945784270763397,
|
|
"step": 7455,
|
|
"valid_targets_mean": 28976.9,
|
|
"valid_targets_min": 18373
|
|
},
|
|
{
|
|
"epoch": 7.9615795090715045,
|
|
"grad_norm": 0.12002426358472632,
|
|
"learning_rate": 1.4844643727129903e-08,
|
|
"loss": 0.3969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.394758939743042,
|
|
"step": 7460,
|
|
"valid_targets_mean": 28830.2,
|
|
"valid_targets_min": 21402
|
|
},
|
|
{
|
|
"epoch": 7.966915688367129,
|
|
"grad_norm": 0.11820445457921111,
|
|
"learning_rate": 1.1103732620043606e-08,
|
|
"loss": 0.3978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3968362808227539,
|
|
"step": 7465,
|
|
"valid_targets_mean": 28914.0,
|
|
"valid_targets_min": 19185
|
|
},
|
|
{
|
|
"epoch": 7.972251867662753,
|
|
"grad_norm": 0.12162588336594275,
|
|
"learning_rate": 7.90494554353538e-09,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3936949670314789,
|
|
"step": 7470,
|
|
"valid_targets_mean": 28676.7,
|
|
"valid_targets_min": 18647
|
|
},
|
|
{
|
|
"epoch": 7.9775880469583775,
|
|
"grad_norm": 0.11925013033412035,
|
|
"learning_rate": 5.248299840920901e-09,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3953138589859009,
|
|
"step": 7475,
|
|
"valid_targets_mean": 28855.8,
|
|
"valid_targets_min": 18558
|
|
},
|
|
{
|
|
"epoch": 7.982924226254002,
|
|
"grad_norm": 0.12052536553549298,
|
|
"learning_rate": 3.1338099161226653e-09,
|
|
"loss": 0.3982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40008604526519775,
|
|
"step": 7480,
|
|
"valid_targets_mean": 28778.8,
|
|
"valid_targets_min": 19474
|
|
},
|
|
{
|
|
"epoch": 7.988260405549626,
|
|
"grad_norm": 0.11952974486435448,
|
|
"learning_rate": 1.5614872335589781e-09,
|
|
"loss": 0.3965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40297284722328186,
|
|
"step": 7485,
|
|
"valid_targets_mean": 28927.8,
|
|
"valid_targets_min": 22890
|
|
},
|
|
{
|
|
"epoch": 7.993596584845251,
|
|
"grad_norm": 0.12460476994872255,
|
|
"learning_rate": 5.313403181328447e-10,
|
|
"loss": 0.3989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3980481028556824,
|
|
"step": 7490,
|
|
"valid_targets_mean": 28616.4,
|
|
"valid_targets_min": 19327
|
|
},
|
|
{
|
|
"epoch": 7.998932764140875,
|
|
"grad_norm": 0.11985334396945319,
|
|
"learning_rate": 4.3374755120950685e-11,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3979700207710266,
|
|
"step": 7495,
|
|
"valid_targets_mean": 28892.5,
|
|
"valid_targets_min": 21667
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4003600478172302,
|
|
"step": 7496,
|
|
"total_flos": 4.678153526260531e+16,
|
|
"train_loss": 0.13369933033527026,
|
|
"train_runtime": 19962.4709,
|
|
"train_samples_per_second": 384.408,
|
|
"train_steps_per_second": 0.376,
|
|
"valid_targets_mean": 28802.2,
|
|
"valid_targets_min": 22042
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 7496,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 8,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.678153526260531e+16,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|