873 lines
24 KiB
JSON
873 lines
24 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 6.0,
|
|
"eval_steps": 500,
|
|
"global_step": 378,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.07936507936507936,
|
|
"grad_norm": 18.09902303910361,
|
|
"learning_rate": 4.210526315789474e-06,
|
|
"loss": 1.0034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.996525764465332,
|
|
"step": 5,
|
|
"valid_targets_mean": 1317.2,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"grad_norm": 8.16991336123599,
|
|
"learning_rate": 9.473684210526315e-06,
|
|
"loss": 0.8848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7810754776000977,
|
|
"step": 10,
|
|
"valid_targets_mean": 1280.9,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.23809523809523808,
|
|
"grad_norm": 3.1633278706676378,
|
|
"learning_rate": 1.4736842105263159e-05,
|
|
"loss": 0.6728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6020610332489014,
|
|
"step": 15,
|
|
"valid_targets_mean": 1262.5,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"grad_norm": 2.144961671140719,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.5289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5145202875137329,
|
|
"step": 20,
|
|
"valid_targets_mean": 1289.9,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.3968253968253968,
|
|
"grad_norm": 1.5113256761550613,
|
|
"learning_rate": 2.526315789473684e-05,
|
|
"loss": 0.4561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43933457136154175,
|
|
"step": 25,
|
|
"valid_targets_mean": 1349.3,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"grad_norm": 1.4341124853993426,
|
|
"learning_rate": 3.052631578947369e-05,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36623549461364746,
|
|
"step": 30,
|
|
"valid_targets_mean": 1292.4,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 1.1254468593395395,
|
|
"learning_rate": 3.578947368421053e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32470637559890747,
|
|
"step": 35,
|
|
"valid_targets_mean": 1341.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"grad_norm": 1.1077925223265233,
|
|
"learning_rate": 3.999914623406736e-05,
|
|
"loss": 0.3518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3396565914154053,
|
|
"step": 40,
|
|
"valid_targets_mean": 1224.6,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 1.0453242597684407,
|
|
"learning_rate": 3.9969272079348685e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3043442964553833,
|
|
"step": 45,
|
|
"valid_targets_mean": 1244.7,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"grad_norm": 0.9888625604591795,
|
|
"learning_rate": 3.989678249165612e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29527759552001953,
|
|
"step": 50,
|
|
"valid_targets_mean": 1431.3,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 0.873015873015873,
|
|
"grad_norm": 0.9865480113822774,
|
|
"learning_rate": 3.9781832167422926e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3123767673969269,
|
|
"step": 55,
|
|
"valid_targets_mean": 1331.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"grad_norm": 0.9489794948782682,
|
|
"learning_rate": 3.962466641643398e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31309300661087036,
|
|
"step": 60,
|
|
"valid_targets_mean": 1493.4,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 1.0317460317460316,
|
|
"grad_norm": 1.1071899523713842,
|
|
"learning_rate": 3.942562063832228e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574073374271393,
|
|
"step": 65,
|
|
"valid_targets_mean": 1164.1,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 1.1419435529606288,
|
|
"learning_rate": 3.9185119606809305e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24323183298110962,
|
|
"step": 70,
|
|
"valid_targets_mean": 1309.0,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.1904761904761905,
|
|
"grad_norm": 0.8396527150729525,
|
|
"learning_rate": 3.89036765632164e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24688035249710083,
|
|
"step": 75,
|
|
"valid_targets_mean": 1233.3,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 1.2698412698412698,
|
|
"grad_norm": 1.031066680643171,
|
|
"learning_rate": 3.8581892121181984e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587030231952667,
|
|
"step": 80,
|
|
"valid_targets_mean": 1194.1,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 1.3492063492063493,
|
|
"grad_norm": 1.0087562996042767,
|
|
"learning_rate": 3.822045298492177e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26486936211586,
|
|
"step": 85,
|
|
"valid_targets_mean": 1248.9,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.8956917783711329,
|
|
"learning_rate": 3.782013048376736e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26159846782684326,
|
|
"step": 90,
|
|
"valid_targets_mean": 1584.2,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 1.507936507936508,
|
|
"grad_norm": 1.0889510509050193,
|
|
"learning_rate": 3.738177892611057e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263064444065094,
|
|
"step": 95,
|
|
"valid_targets_mean": 1269.8,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.5873015873015874,
|
|
"grad_norm": 0.9208538453912383,
|
|
"learning_rate": 3.690633377626628e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552173435688019,
|
|
"step": 100,
|
|
"valid_targets_mean": 1260.8,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 1.0431803629591703,
|
|
"learning_rate": 3.639480965814443e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2334054708480835,
|
|
"step": 105,
|
|
"valid_targets_mean": 1119.9,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.746031746031746,
|
|
"grad_norm": 1.052527932597642,
|
|
"learning_rate": 3.584829818999148e-05,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26042911410331726,
|
|
"step": 110,
|
|
"valid_targets_mean": 1358.5,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 1.8253968253968254,
|
|
"grad_norm": 1.2050528658411404,
|
|
"learning_rate": 3.526796565482206e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24487106502056122,
|
|
"step": 115,
|
|
"valid_targets_mean": 1421.6,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 1.9047619047619047,
|
|
"grad_norm": 0.9072467275412588,
|
|
"learning_rate": 3.4655050511512236e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25254106521606445,
|
|
"step": 120,
|
|
"valid_targets_mean": 1449.0,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 1.9841269841269842,
|
|
"grad_norm": 1.0303768935216235,
|
|
"learning_rate": 3.401086075186582e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24224883317947388,
|
|
"step": 125,
|
|
"valid_targets_mean": 1346.0,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 2.0634920634920633,
|
|
"grad_norm": 1.0599334863834182,
|
|
"learning_rate": 3.333677110929403e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22304898500442505,
|
|
"step": 130,
|
|
"valid_targets_mean": 1223.6,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.9942911032267476,
|
|
"learning_rate": 3.263422012506502e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23300984501838684,
|
|
"step": 135,
|
|
"valid_targets_mean": 1373.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.9867493029938139,
|
|
"learning_rate": 3.190470707838438e-05,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1962147355079651,
|
|
"step": 140,
|
|
"valid_targets_mean": 1268.1,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 2.3015873015873014,
|
|
"grad_norm": 0.9683968495450838,
|
|
"learning_rate": 3.114978878685771e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22331558167934418,
|
|
"step": 145,
|
|
"valid_targets_mean": 1451.9,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 2.380952380952381,
|
|
"grad_norm": 3.9084274034442066,
|
|
"learning_rate": 3.0371076284163442e-05,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22763928771018982,
|
|
"step": 150,
|
|
"valid_targets_mean": 1204.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 2.4603174603174605,
|
|
"grad_norm": 1.0158116125994063,
|
|
"learning_rate": 2.9570231382025732e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18950828909873962,
|
|
"step": 155,
|
|
"valid_targets_mean": 1317.1,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 2.5396825396825395,
|
|
"grad_norm": 0.896681347419589,
|
|
"learning_rate": 2.8748963123824532e-05,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22112029790878296,
|
|
"step": 160,
|
|
"valid_targets_mean": 1424.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 2.619047619047619,
|
|
"grad_norm": 1.0606529423953202,
|
|
"learning_rate": 2.790902413741085e-05,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2376096546649933,
|
|
"step": 165,
|
|
"valid_targets_mean": 1399.1,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 2.6984126984126986,
|
|
"grad_norm": 0.9396280814023571,
|
|
"learning_rate": 2.7052206894910653e-05,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2136600762605667,
|
|
"step": 170,
|
|
"valid_targets_mean": 1357.7,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 1.083930671175511,
|
|
"learning_rate": 2.618033988749895e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19602251052856445,
|
|
"step": 175,
|
|
"valid_targets_mean": 1215.0,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.979793298540035,
|
|
"learning_rate": 2.5295283723307517e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2314784824848175,
|
|
"step": 180,
|
|
"valid_targets_mean": 1266.5,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 2.9365079365079367,
|
|
"grad_norm": 1.013880387387088,
|
|
"learning_rate": 2.4398927156793376e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20053911209106445,
|
|
"step": 185,
|
|
"valid_targets_mean": 1415.3,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 3.015873015873016,
|
|
"grad_norm": 0.9506724254980058,
|
|
"learning_rate": 2.3493183058041578e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18665489554405212,
|
|
"step": 190,
|
|
"valid_targets_mean": 1395.1,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 3.0952380952380953,
|
|
"grad_norm": 1.2045650788528535,
|
|
"learning_rate": 2.257998433060407e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1960422694683075,
|
|
"step": 195,
|
|
"valid_targets_mean": 1277.2,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 3.1746031746031744,
|
|
"grad_norm": 0.9904415244786193,
|
|
"learning_rate": 2.166127978658608e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18444815278053284,
|
|
"step": 200,
|
|
"valid_targets_mean": 1417.6,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 3.253968253968254,
|
|
"grad_norm": 0.9573080350285159,
|
|
"learning_rate": 2.0739029987782903e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18823915719985962,
|
|
"step": 205,
|
|
"valid_targets_mean": 1211.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.8982800713768102,
|
|
"learning_rate": 1.9815203061742188e-05,
|
|
"loss": 0.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1576843112707138,
|
|
"step": 210,
|
|
"valid_targets_mean": 1294.8,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 3.4126984126984126,
|
|
"grad_norm": 0.9855158183383274,
|
|
"learning_rate": 1.8891770501680602e-05,
|
|
"loss": 0.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18304908275604248,
|
|
"step": 215,
|
|
"valid_targets_mean": 1243.9,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 3.492063492063492,
|
|
"grad_norm": 1.0040666267759992,
|
|
"learning_rate": 1.7970702959217944e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1883706897497177,
|
|
"step": 220,
|
|
"valid_targets_mean": 1353.8,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 1.4558785666136118,
|
|
"learning_rate": 1.705396603890725e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20024870336055756,
|
|
"step": 225,
|
|
"valid_targets_mean": 1336.3,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 3.6507936507936507,
|
|
"grad_norm": 1.0249713499978004,
|
|
"learning_rate": 1.6143516103535666e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17252278327941895,
|
|
"step": 230,
|
|
"valid_targets_mean": 1262.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.7301587301587302,
|
|
"grad_norm": 0.9876581877640742,
|
|
"learning_rate": 1.524129609914763e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18393632769584656,
|
|
"step": 235,
|
|
"valid_targets_mean": 1363.2,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.8095238095238093,
|
|
"grad_norm": 0.951472253586105,
|
|
"learning_rate": 1.43492314087001e-05,
|
|
"loss": 0.1815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19082951545715332,
|
|
"step": 240,
|
|
"valid_targets_mean": 1442.2,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 1.1055947291031696,
|
|
"learning_rate": 1.3469225743198337e-05,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18884754180908203,
|
|
"step": 245,
|
|
"valid_targets_mean": 1243.9,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 3.9682539682539684,
|
|
"grad_norm": 0.9454028728577349,
|
|
"learning_rate": 1.260315707908062e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19582678377628326,
|
|
"step": 250,
|
|
"valid_targets_mean": 1453.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 4.0476190476190474,
|
|
"grad_norm": 0.9615770906418738,
|
|
"learning_rate": 1.1752873650521934e-05,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702508181333542,
|
|
"step": 255,
|
|
"valid_targets_mean": 1367.9,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 4.1269841269841265,
|
|
"grad_norm": 1.0494388437222053,
|
|
"learning_rate": 1.0920190005209066e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574375033378601,
|
|
"step": 260,
|
|
"valid_targets_mean": 1268.7,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 4.2063492063492065,
|
|
"grad_norm": 1.058177748509928,
|
|
"learning_rate": 1.0106883132004428e-05,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15336745977401733,
|
|
"step": 265,
|
|
"valid_targets_mean": 1281.3,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 1.1128115262277665,
|
|
"learning_rate": 9.314688668762232e-06,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15343694388866425,
|
|
"step": 270,
|
|
"valid_targets_mean": 1171.1,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 4.365079365079365,
|
|
"grad_norm": 0.9068931883638662,
|
|
"learning_rate": 8.545297198389896e-06,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522439420223236,
|
|
"step": 275,
|
|
"valid_targets_mean": 1484.4,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 0.987267277849222,
|
|
"learning_rate": 7.800350641058867e-06,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17415179312229156,
|
|
"step": 280,
|
|
"valid_targets_mean": 1327.5,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 4.523809523809524,
|
|
"grad_norm": 0.9406767373189786,
|
|
"learning_rate": 7.081438750264258e-06,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646547019481659,
|
|
"step": 285,
|
|
"valid_targets_mean": 1483.0,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 4.603174603174603,
|
|
"grad_norm": 1.1253357217876951,
|
|
"learning_rate": 6.3900957202107695e-06,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593586802482605,
|
|
"step": 290,
|
|
"valid_targets_mean": 1123.4,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 4.682539682539683,
|
|
"grad_norm": 1.0811536769207744,
|
|
"learning_rate": 5.727796911764955e-06,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17252852022647858,
|
|
"step": 295,
|
|
"valid_targets_mean": 1290.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.761904761904762,
|
|
"grad_norm": 1.0374368953244144,
|
|
"learning_rate": 5.095955703960746e-06,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17260053753852844,
|
|
"step": 300,
|
|
"valid_targets_mean": 1328.1,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 4.841269841269841,
|
|
"grad_norm": 1.034565590736608,
|
|
"learning_rate": 4.495920477777403e-06,
|
|
"loss": 0.1551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14985054731369019,
|
|
"step": 305,
|
|
"valid_targets_mean": 1234.3,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 4.920634920634921,
|
|
"grad_norm": 1.0765022207707986,
|
|
"learning_rate": 3.9289717386265255e-06,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1612224131822586,
|
|
"step": 310,
|
|
"valid_targets_mean": 1193.8,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.9478245585431166,
|
|
"learning_rate": 3.3963193836889907e-06,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1592485010623932,
|
|
"step": 315,
|
|
"valid_targets_mean": 1360.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.079365079365079,
|
|
"grad_norm": 1.0328174266538293,
|
|
"learning_rate": 2.89910011993338e-06,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14890140295028687,
|
|
"step": 320,
|
|
"valid_targets_mean": 1574.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 5.158730158730159,
|
|
"grad_norm": 0.9579410192765627,
|
|
"learning_rate": 2.4383750383260417e-06,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14151515066623688,
|
|
"step": 325,
|
|
"valid_targets_mean": 1227.1,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 5.238095238095238,
|
|
"grad_norm": 1.0498590988698493,
|
|
"learning_rate": 2.015127349409489e-06,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15444359183311462,
|
|
"step": 330,
|
|
"valid_targets_mean": 1198.4,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 5.317460317460317,
|
|
"grad_norm": 1.090019046173157,
|
|
"learning_rate": 1.6302602850815397e-06,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15183570981025696,
|
|
"step": 335,
|
|
"valid_targets_mean": 1241.6,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.396825396825397,
|
|
"grad_norm": 1.0209735271764744,
|
|
"learning_rate": 1.2845951710529513e-06,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16618052124977112,
|
|
"step": 340,
|
|
"valid_targets_mean": 1384.1,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 5.476190476190476,
|
|
"grad_norm": 1.167276848177137,
|
|
"learning_rate": 9.788696740969295e-07,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15419039130210876,
|
|
"step": 345,
|
|
"valid_targets_mean": 1056.2,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 1.0131829348691888,
|
|
"learning_rate": 7.137362278311033e-07,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13364669680595398,
|
|
"step": 350,
|
|
"valid_targets_mean": 1154.5,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 5.634920634920634,
|
|
"grad_norm": 1.1216486988072,
|
|
"learning_rate": 4.89760640391268e-07,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446513682603836,
|
|
"step": 355,
|
|
"valid_targets_mean": 1223.9,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 1.0546772616513356,
|
|
"learning_rate": 3.074208869683282e-07,
|
|
"loss": 0.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14680185914039612,
|
|
"step": 360,
|
|
"valid_targets_mean": 1361.4,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 5.7936507936507935,
|
|
"grad_norm": 1.1759274152221304,
|
|
"learning_rate": 1.6710608978514509e-07,
|
|
"loss": 0.1529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16227003931999207,
|
|
"step": 365,
|
|
"valid_targets_mean": 1172.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.8730158730158735,
|
|
"grad_norm": 1.064627033500516,
|
|
"learning_rate": 6.91156876901089e-08,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18192672729492188,
|
|
"step": 370,
|
|
"valid_targets_mean": 1303.9,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 1.1282545011521061,
|
|
"learning_rate": 1.365879713954188e-08,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329009234905243,
|
|
"step": 375,
|
|
"valid_targets_mean": 1124.7,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15850412845611572,
|
|
"step": 378,
|
|
"total_flos": 39306874650624.0,
|
|
"train_loss": 0.23945725027215545,
|
|
"train_runtime": 1368.0623,
|
|
"train_samples_per_second": 4.386,
|
|
"train_steps_per_second": 0.276,
|
|
"valid_targets_mean": 1612.7,
|
|
"valid_targets_min": 692
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 378,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 6,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 39306874650624.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|