675 lines
18 KiB
JSON
675 lines
18 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 287,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.12195121951219512,
|
|
"grad_norm": 11.268886282590001,
|
|
"learning_rate": 5.517241379310345e-06,
|
|
"loss": 0.7129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08619208633899689,
|
|
"step": 5,
|
|
"valid_targets_mean": 1809.8,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.24390243902439024,
|
|
"grad_norm": 6.618387158921114,
|
|
"learning_rate": 1.2413793103448277e-05,
|
|
"loss": 0.5949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15227359533309937,
|
|
"step": 10,
|
|
"valid_targets_mean": 1454.2,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 0.36585365853658536,
|
|
"grad_norm": 1.1697598603814097,
|
|
"learning_rate": 1.931034482758621e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03705377131700516,
|
|
"step": 15,
|
|
"valid_targets_mean": 1068.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 0.4878048780487805,
|
|
"grad_norm": 1.0542992580910668,
|
|
"learning_rate": 2.620689655172414e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05863872170448303,
|
|
"step": 20,
|
|
"valid_targets_mean": 1012.8,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 0.6097560975609756,
|
|
"grad_norm": 0.8906250398426856,
|
|
"learning_rate": 3.310344827586207e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1044623851776123,
|
|
"step": 25,
|
|
"valid_targets_mean": 3050.0,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 0.7317073170731707,
|
|
"grad_norm": 0.7873917512747328,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07892066985368729,
|
|
"step": 30,
|
|
"valid_targets_mean": 2248.5,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 0.8536585365853658,
|
|
"grad_norm": 0.7365758820610112,
|
|
"learning_rate": 3.99629433475729e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05776943266391754,
|
|
"step": 35,
|
|
"valid_targets_mean": 2209.0,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 0.975609756097561,
|
|
"grad_norm": 0.9982728088095962,
|
|
"learning_rate": 3.985191070984053e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08143679797649384,
|
|
"step": 40,
|
|
"valid_targets_mean": 1733.5,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 1.0975609756097562,
|
|
"grad_norm": 0.6049830734772758,
|
|
"learning_rate": 3.966731353658932e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05846400931477547,
|
|
"step": 45,
|
|
"valid_targets_mean": 2682.8,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 1.2195121951219512,
|
|
"grad_norm": 0.6718448910202545,
|
|
"learning_rate": 3.940983588314811e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05738037824630737,
|
|
"step": 50,
|
|
"valid_targets_mean": 2266.2,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 1.3414634146341464,
|
|
"grad_norm": 0.7687467547629783,
|
|
"learning_rate": 3.908043187550802e-05,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034274592995643616,
|
|
"step": 55,
|
|
"valid_targets_mean": 1044.0,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 1.4634146341463414,
|
|
"grad_norm": 0.4409997756684539,
|
|
"learning_rate": 3.868032217465097e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06288576126098633,
|
|
"step": 60,
|
|
"valid_targets_mean": 7464.2,
|
|
"valid_targets_min": 4233
|
|
},
|
|
{
|
|
"epoch": 1.5853658536585367,
|
|
"grad_norm": 0.6669183491635846,
|
|
"learning_rate": 3.821098945318869e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041596993803977966,
|
|
"step": 65,
|
|
"valid_targets_mean": 2462.2,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 1.7073170731707317,
|
|
"grad_norm": 0.833951005600001,
|
|
"learning_rate": 3.767417290107439e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0828239768743515,
|
|
"step": 70,
|
|
"valid_targets_mean": 1676.0,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 1.8292682926829267,
|
|
"grad_norm": 0.3827765345675253,
|
|
"learning_rate": 3.7071861780746934e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03231481462717056,
|
|
"step": 75,
|
|
"valid_targets_mean": 6333.8,
|
|
"valid_targets_min": 1529
|
|
},
|
|
{
|
|
"epoch": 1.951219512195122,
|
|
"grad_norm": 0.405420209692415,
|
|
"learning_rate": 3.640628805559022e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021378157660365105,
|
|
"step": 80,
|
|
"valid_targets_mean": 2585.5,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 2.073170731707317,
|
|
"grad_norm": 0.6706457928261834,
|
|
"learning_rate": 3.567991811902403e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029131846502423286,
|
|
"step": 85,
|
|
"valid_targets_mean": 1901.5,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 2.1951219512195124,
|
|
"grad_norm": 0.6506505238615833,
|
|
"learning_rate": 3.489544365487564e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06596972048282623,
|
|
"step": 90,
|
|
"valid_targets_mean": 2361.2,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 2.317073170731707,
|
|
"grad_norm": 0.5666611455881608,
|
|
"learning_rate": 3.4055771662900637e-05,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032204728573560715,
|
|
"step": 95,
|
|
"valid_targets_mean": 1985.5,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 2.4390243902439024,
|
|
"grad_norm": 0.5528709212555227,
|
|
"learning_rate": 3.316401368641496e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03457804396748543,
|
|
"step": 100,
|
|
"valid_targets_mean": 3545.0,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 2.5609756097560976,
|
|
"grad_norm": 0.8867699611106878,
|
|
"learning_rate": 3.222347428195699e-05,
|
|
"loss": 0.182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06031285971403122,
|
|
"step": 105,
|
|
"valid_targets_mean": 1640.8,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 2.682926829268293,
|
|
"grad_norm": 0.5690766882507081,
|
|
"learning_rate": 3.1237638773707214e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04014682024717331,
|
|
"step": 110,
|
|
"valid_targets_mean": 2029.8,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 2.8048780487804876,
|
|
"grad_norm": 0.7008728096682685,
|
|
"learning_rate": 3.0210160338043583e-05,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07026761770248413,
|
|
"step": 115,
|
|
"valid_targets_mean": 2580.8,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 2.926829268292683,
|
|
"grad_norm": 0.7716450116227951,
|
|
"learning_rate": 2.9144846466092773e-05,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06721051037311554,
|
|
"step": 120,
|
|
"valid_targets_mean": 2570.2,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 3.048780487804878,
|
|
"grad_norm": 1.5503052769053867,
|
|
"learning_rate": 2.804564485444265e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0261751189827919,
|
|
"step": 125,
|
|
"valid_targets_mean": 1452.5,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 3.1707317073170733,
|
|
"grad_norm": 0.6734417093831508,
|
|
"learning_rate": 2.691662877630023e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028553606942296028,
|
|
"step": 130,
|
|
"valid_targets_mean": 2922.2,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 3.292682926829268,
|
|
"grad_norm": 1.456490794322615,
|
|
"learning_rate": 2.5761981987304757e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03396756574511528,
|
|
"step": 135,
|
|
"valid_targets_mean": 1340.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 3.4146341463414633,
|
|
"grad_norm": 0.9411604050978699,
|
|
"learning_rate": 2.4585983221929803e-05,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037783801555633545,
|
|
"step": 140,
|
|
"valid_targets_mean": 2368.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.5365853658536586,
|
|
"grad_norm": 0.9034332983731574,
|
|
"learning_rate": 2.3392990337925696e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04916565865278244,
|
|
"step": 145,
|
|
"valid_targets_mean": 1793.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 3.658536585365854,
|
|
"grad_norm": 0.9084489941265945,
|
|
"learning_rate": 2.2187424167557496e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04747513309121132,
|
|
"step": 150,
|
|
"valid_targets_mean": 2595.5,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 3.7804878048780486,
|
|
"grad_norm": 0.8065019341177397,
|
|
"learning_rate": 2.0973752135480505e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030843544751405716,
|
|
"step": 155,
|
|
"valid_targets_mean": 1864.2,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 3.902439024390244,
|
|
"grad_norm": 0.5952164537741085,
|
|
"learning_rate": 1.9756471703960053e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020152652636170387,
|
|
"step": 160,
|
|
"valid_targets_mean": 1157.0,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 4.024390243902439,
|
|
"grad_norm": 0.5913764265658609,
|
|
"learning_rate": 1.8540093706781848e-05,
|
|
"loss": 0.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023658908903598785,
|
|
"step": 165,
|
|
"valid_targets_mean": 1657.0,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 4.146341463414634,
|
|
"grad_norm": 0.8822354625847237,
|
|
"learning_rate": 1.7329125633612044e-05,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03471684455871582,
|
|
"step": 170,
|
|
"valid_targets_mean": 1434.2,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 4.2682926829268295,
|
|
"grad_norm": 1.1958146779982144,
|
|
"learning_rate": 1.6128054926749403e-05,
|
|
"loss": 0.1245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03604736179113388,
|
|
"step": 175,
|
|
"valid_targets_mean": 2452.0,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 4.390243902439025,
|
|
"grad_norm": 0.660365938604491,
|
|
"learning_rate": 1.4941332352166385e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022628050297498703,
|
|
"step": 180,
|
|
"valid_targets_mean": 2193.5,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.512195121951219,
|
|
"grad_norm": 0.6469748368984156,
|
|
"learning_rate": 1.3773355506460369e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04104577377438545,
|
|
"step": 185,
|
|
"valid_targets_mean": 2404.2,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 4.634146341463414,
|
|
"grad_norm": 0.5688289937825557,
|
|
"learning_rate": 1.2628452520832766e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026066523045301437,
|
|
"step": 190,
|
|
"valid_targets_mean": 1617.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.7560975609756095,
|
|
"grad_norm": 0.8846513600102238,
|
|
"learning_rate": 1.1510866022483702e-05,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03839657083153725,
|
|
"step": 195,
|
|
"valid_targets_mean": 2619.5,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.878048780487805,
|
|
"grad_norm": 0.8205515547497473,
|
|
"learning_rate": 1.0424737412855825e-05,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05243876576423645,
|
|
"step": 200,
|
|
"valid_targets_mean": 1985.8,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.8961051415857917,
|
|
"learning_rate": 9.374091520986936e-06,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026954276487231255,
|
|
"step": 205,
|
|
"valid_targets_mean": 2568.2,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.121951219512195,
|
|
"grad_norm": 0.6162209862161441,
|
|
"learning_rate": 8.362821688840947e-06,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024338502436876297,
|
|
"step": 210,
|
|
"valid_targets_mean": 1392.8,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 5.2439024390243905,
|
|
"grad_norm": 0.8925372695642547,
|
|
"learning_rate": 7.394675343885827e-06,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03295106813311577,
|
|
"step": 215,
|
|
"valid_targets_mean": 3177.8,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 5.365853658536586,
|
|
"grad_norm": 0.6073755624637226,
|
|
"learning_rate": 6.473240112381944e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02173219993710518,
|
|
"step": 220,
|
|
"valid_targets_mean": 3121.0,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 5.487804878048781,
|
|
"grad_norm": 1.040705900656647,
|
|
"learning_rate": 5.601930524840087e-06,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025791862979531288,
|
|
"step": 225,
|
|
"valid_targets_mean": 2800.5,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 5.609756097560975,
|
|
"grad_norm": 0.8950373265293078,
|
|
"learning_rate": 4.7839753629144395e-06,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014685861766338348,
|
|
"step": 230,
|
|
"valid_targets_mean": 945.0,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 5.7317073170731705,
|
|
"grad_norm": 0.7649064738807579,
|
|
"learning_rate": 4.022405694618659e-06,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02761884592473507,
|
|
"step": 235,
|
|
"valid_targets_mean": 1485.0,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 5.853658536585366,
|
|
"grad_norm": 0.8719679552921337,
|
|
"learning_rate": 3.320043642202444e-06,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03598159924149513,
|
|
"step": 240,
|
|
"valid_targets_mean": 3070.0,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 5.975609756097561,
|
|
"grad_norm": 0.6242438144503197,
|
|
"learning_rate": 2.679491924311226e-06,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025556661188602448,
|
|
"step": 245,
|
|
"valid_targets_mean": 2245.0,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 6.097560975609756,
|
|
"grad_norm": 0.8732700455256523,
|
|
"learning_rate": 2.103124211182164e-06,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03347954526543617,
|
|
"step": 250,
|
|
"valid_targets_mean": 1517.0,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 6.219512195121951,
|
|
"grad_norm": 1.0111215639307505,
|
|
"learning_rate": 1.5930763286168138e-06,
|
|
"loss": 0.1049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026082392781972885,
|
|
"step": 255,
|
|
"valid_targets_mean": 1642.8,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 6.341463414634147,
|
|
"grad_norm": 0.7577281317860388,
|
|
"learning_rate": 1.1512383433257112e-06,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01752658188343048,
|
|
"step": 260,
|
|
"valid_targets_mean": 2109.5,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 6.463414634146342,
|
|
"grad_norm": 0.6730869279262718,
|
|
"learning_rate": 7.792475589738679e-07,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020278798416256905,
|
|
"step": 265,
|
|
"valid_targets_mean": 1482.5,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 6.585365853658536,
|
|
"grad_norm": 0.7854617549186186,
|
|
"learning_rate": 4.784824488814588e-07,
|
|
"loss": 0.1053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022827567532658577,
|
|
"step": 270,
|
|
"valid_targets_mean": 2941.8,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 6.7073170731707314,
|
|
"grad_norm": 0.5254164831244615,
|
|
"learning_rate": 2.5005754786317173e-07,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022403722628951073,
|
|
"step": 275,
|
|
"valid_targets_mean": 2718.0,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 6.829268292682927,
|
|
"grad_norm": 0.6678257554250038,
|
|
"learning_rate": 9.481932213528444e-08,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027537822723388672,
|
|
"step": 280,
|
|
"valid_targets_mean": 3525.0,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 6.951219512195122,
|
|
"grad_norm": 0.8096613625255807,
|
|
"learning_rate": 1.334303259521219e-08,
|
|
"loss": 0.1098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03351534903049469,
|
|
"step": 285,
|
|
"valid_targets_mean": 1808.5,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016517292708158493,
|
|
"step": 287,
|
|
"total_flos": 9.154523001624986e+16,
|
|
"train_loss": 0.17878121568558522,
|
|
"train_runtime": 15474.591,
|
|
"train_samples_per_second": 0.297,
|
|
"train_steps_per_second": 0.019,
|
|
"valid_targets_mean": 880.2,
|
|
"valid_targets_min": 562
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 287,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 9.154523001624986e+16,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|