Model: Abner0803/Qwen3-1.7B-msmarco-text-100k-with_pseudo_queries Source: Original Platform
5035 lines
152 KiB
JSON
5035 lines
152 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.727387276450121,
|
|
"eval_steps": 500,
|
|
"global_step": 25000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.011454917739371984,
|
|
"grad_norm": 53.10772705078125,
|
|
"learning_rate": 1.1228230980751604e-06,
|
|
"loss": 5.3499,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.022909835478743968,
|
|
"grad_norm": 33.95736312866211,
|
|
"learning_rate": 2.268560953253896e-06,
|
|
"loss": 5.0386,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.034364753218115954,
|
|
"grad_norm": 16.349882125854492,
|
|
"learning_rate": 3.414298808432631e-06,
|
|
"loss": 3.9819,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.045819670957487936,
|
|
"grad_norm": 15.363502502441406,
|
|
"learning_rate": 4.5600366636113664e-06,
|
|
"loss": 3.164,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05727458869685992,
|
|
"grad_norm": 17.262718200683594,
|
|
"learning_rate": 5.705774518790101e-06,
|
|
"loss": 2.8121,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.06872950643623191,
|
|
"grad_norm": 14.994147300720215,
|
|
"learning_rate": 6.8515123739688366e-06,
|
|
"loss": 2.4217,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.08018442417560388,
|
|
"grad_norm": 11.715180397033691,
|
|
"learning_rate": 7.997250229147571e-06,
|
|
"loss": 2.1894,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.09163934191497587,
|
|
"grad_norm": 10.08484172821045,
|
|
"learning_rate": 9.142988084326307e-06,
|
|
"loss": 2.1338,
|
|
"memory/device_mem_reserved(gib)": 49.37,
|
|
"memory/max_mem_active(gib)": 44.85,
|
|
"memory/max_mem_allocated(gib)": 44.85,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.10309425965434786,
|
|
"grad_norm": 9.557114601135254,
|
|
"learning_rate": 1.0288725939505042e-05,
|
|
"loss": 2.0711,
|
|
"memory/device_mem_reserved(gib)": 51.36,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.11454917739371984,
|
|
"grad_norm": 9.078670501708984,
|
|
"learning_rate": 1.1434463794683776e-05,
|
|
"loss": 2.0381,
|
|
"memory/device_mem_reserved(gib)": 51.36,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.12600409513309183,
|
|
"grad_norm": 9.677817344665527,
|
|
"learning_rate": 1.2580201649862511e-05,
|
|
"loss": 2.0244,
|
|
"memory/device_mem_reserved(gib)": 51.36,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.13745901287246381,
|
|
"grad_norm": 10.270977973937988,
|
|
"learning_rate": 1.3725939505041247e-05,
|
|
"loss": 1.9982,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.1489139306118358,
|
|
"grad_norm": 8.053842544555664,
|
|
"learning_rate": 1.4871677360219982e-05,
|
|
"loss": 2.0071,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.16036884835120777,
|
|
"grad_norm": 8.858375549316406,
|
|
"learning_rate": 1.6017415215398718e-05,
|
|
"loss": 1.9546,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.17182376609057975,
|
|
"grad_norm": 9.538141250610352,
|
|
"learning_rate": 1.7163153070577455e-05,
|
|
"loss": 1.9464,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.18327868382995174,
|
|
"grad_norm": 7.541695594787598,
|
|
"learning_rate": 1.830889092575619e-05,
|
|
"loss": 1.9085,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.19473360156932373,
|
|
"grad_norm": 7.665754318237305,
|
|
"learning_rate": 1.9454628780934923e-05,
|
|
"loss": 1.9153,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.20618851930869572,
|
|
"grad_norm": 9.04691219329834,
|
|
"learning_rate": 2.0600366636113656e-05,
|
|
"loss": 1.8734,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.2176434370480677,
|
|
"grad_norm": 7.098514080047607,
|
|
"learning_rate": 2.1746104491292394e-05,
|
|
"loss": 1.8809,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.22909835478743967,
|
|
"grad_norm": 7.5708513259887695,
|
|
"learning_rate": 2.2891842346471127e-05,
|
|
"loss": 1.8459,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.24055327252681166,
|
|
"grad_norm": 8.422965049743652,
|
|
"learning_rate": 2.4037580201649865e-05,
|
|
"loss": 1.8414,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.25200819026618365,
|
|
"grad_norm": 7.765232563018799,
|
|
"learning_rate": 2.51833180568286e-05,
|
|
"loss": 1.857,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.2634631080055556,
|
|
"grad_norm": 7.53985595703125,
|
|
"learning_rate": 2.6329055912007332e-05,
|
|
"loss": 1.8113,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.27491802574492763,
|
|
"grad_norm": 7.5806450843811035,
|
|
"learning_rate": 2.747479376718607e-05,
|
|
"loss": 1.793,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.2863729434842996,
|
|
"grad_norm": 6.706181526184082,
|
|
"learning_rate": 2.8620531622364803e-05,
|
|
"loss": 1.8109,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.2978278612236716,
|
|
"grad_norm": 7.132224082946777,
|
|
"learning_rate": 2.976626947754354e-05,
|
|
"loss": 1.7923,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.30928277896304357,
|
|
"grad_norm": 7.725433826446533,
|
|
"learning_rate": 3.091200733272228e-05,
|
|
"loss": 1.7504,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.32073769670241553,
|
|
"grad_norm": 7.6306843757629395,
|
|
"learning_rate": 3.205774518790101e-05,
|
|
"loss": 1.7802,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.33219261444178755,
|
|
"grad_norm": 7.927916049957275,
|
|
"learning_rate": 3.3203483043079745e-05,
|
|
"loss": 1.7454,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.3436475321811595,
|
|
"grad_norm": 7.468013286590576,
|
|
"learning_rate": 3.434922089825848e-05,
|
|
"loss": 1.716,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.3551024499205315,
|
|
"grad_norm": 6.887967586517334,
|
|
"learning_rate": 3.549495875343721e-05,
|
|
"loss": 1.7054,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.3665573676599035,
|
|
"grad_norm": 7.042320251464844,
|
|
"learning_rate": 3.6640696608615946e-05,
|
|
"loss": 1.716,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.37801228539927545,
|
|
"grad_norm": 7.46671199798584,
|
|
"learning_rate": 3.778643446379469e-05,
|
|
"loss": 1.7074,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.38946720313864747,
|
|
"grad_norm": 4.348405838012695,
|
|
"learning_rate": 3.893217231897342e-05,
|
|
"loss": 1.6607,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.40092212087801943,
|
|
"grad_norm": 7.3193511962890625,
|
|
"learning_rate": 4.0077910174152155e-05,
|
|
"loss": 1.6516,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.41237703861739144,
|
|
"grad_norm": 7.363260746002197,
|
|
"learning_rate": 4.122364802933089e-05,
|
|
"loss": 1.6137,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.4238319563567634,
|
|
"grad_norm": 7.189822673797607,
|
|
"learning_rate": 4.236938588450963e-05,
|
|
"loss": 1.5882,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.4352868740961354,
|
|
"grad_norm": 7.271198272705078,
|
|
"learning_rate": 4.351512373968836e-05,
|
|
"loss": 1.5759,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.4467417918355074,
|
|
"grad_norm": 10.216059684753418,
|
|
"learning_rate": 4.4660861594867096e-05,
|
|
"loss": 1.5663,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.45819670957487935,
|
|
"grad_norm": 6.804873943328857,
|
|
"learning_rate": 4.580659945004584e-05,
|
|
"loss": 1.5447,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.46965162731425136,
|
|
"grad_norm": 7.637989044189453,
|
|
"learning_rate": 4.695233730522457e-05,
|
|
"loss": 1.553,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.4811065450536233,
|
|
"grad_norm": 6.641468048095703,
|
|
"learning_rate": 4.80980751604033e-05,
|
|
"loss": 1.5486,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.49256146279299534,
|
|
"grad_norm": 7.134258270263672,
|
|
"learning_rate": 4.924381301558204e-05,
|
|
"loss": 1.5126,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.5040163805323673,
|
|
"grad_norm": 6.905734062194824,
|
|
"learning_rate": 5.038955087076077e-05,
|
|
"loss": 1.4918,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.5154712982717393,
|
|
"grad_norm": 7.143308162689209,
|
|
"learning_rate": 5.153528872593951e-05,
|
|
"loss": 1.4778,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.5269262160111112,
|
|
"grad_norm": 6.968287467956543,
|
|
"learning_rate": 5.268102658111824e-05,
|
|
"loss": 1.431,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.5383811337504832,
|
|
"grad_norm": 7.385350704193115,
|
|
"learning_rate": 5.3826764436296974e-05,
|
|
"loss": 1.4638,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.5498360514898553,
|
|
"grad_norm": 6.7367095947265625,
|
|
"learning_rate": 5.4972502291475714e-05,
|
|
"loss": 1.4236,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.5612909692292273,
|
|
"grad_norm": 7.013253211975098,
|
|
"learning_rate": 5.611824014665444e-05,
|
|
"loss": 1.3933,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.5727458869685992,
|
|
"grad_norm": 7.49541711807251,
|
|
"learning_rate": 5.726397800183319e-05,
|
|
"loss": 1.3847,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.5842008047079712,
|
|
"grad_norm": 7.078319549560547,
|
|
"learning_rate": 5.8409715857011915e-05,
|
|
"loss": 1.3825,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.5956557224473432,
|
|
"grad_norm": 7.429485321044922,
|
|
"learning_rate": 5.9555453712190656e-05,
|
|
"loss": 1.3629,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.6071106401867151,
|
|
"grad_norm": 7.05700159072876,
|
|
"learning_rate": 6.070119156736939e-05,
|
|
"loss": 1.3372,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.6185655579260871,
|
|
"grad_norm": 7.29513692855835,
|
|
"learning_rate": 6.184692942254812e-05,
|
|
"loss": 1.3185,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.6300204756654592,
|
|
"grad_norm": 6.8477911949157715,
|
|
"learning_rate": 6.299266727772686e-05,
|
|
"loss": 1.3066,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.6414753934048311,
|
|
"grad_norm": 7.389026641845703,
|
|
"learning_rate": 6.41384051329056e-05,
|
|
"loss": 1.2829,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.6529303111442031,
|
|
"grad_norm": 6.852631568908691,
|
|
"learning_rate": 6.528414298808432e-05,
|
|
"loss": 1.274,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.6643852288835751,
|
|
"grad_norm": 7.158923625946045,
|
|
"learning_rate": 6.642988084326306e-05,
|
|
"loss": 1.2486,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.6758401466229471,
|
|
"grad_norm": 7.069329261779785,
|
|
"learning_rate": 6.75756186984418e-05,
|
|
"loss": 1.2517,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.687295064362319,
|
|
"grad_norm": 6.942631721496582,
|
|
"learning_rate": 6.872135655362053e-05,
|
|
"loss": 1.1829,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.698749982101691,
|
|
"grad_norm": 7.831090450286865,
|
|
"learning_rate": 6.986709440879927e-05,
|
|
"loss": 1.2037,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.710204899841063,
|
|
"grad_norm": 6.641531467437744,
|
|
"learning_rate": 7.101283226397801e-05,
|
|
"loss": 1.1565,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.721659817580435,
|
|
"grad_norm": 7.846933841705322,
|
|
"learning_rate": 7.215857011915674e-05,
|
|
"loss": 1.1647,
|
|
"memory/device_mem_reserved(gib)": 51.42,
|
|
"memory/max_mem_active(gib)": 46.59,
|
|
"memory/max_mem_allocated(gib)": 46.59,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.733114735319807,
|
|
"grad_norm": 6.905858039855957,
|
|
"learning_rate": 7.330430797433548e-05,
|
|
"loss": 1.1853,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.744569653059179,
|
|
"grad_norm": 7.997142314910889,
|
|
"learning_rate": 7.445004582951421e-05,
|
|
"loss": 1.1541,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.7560245707985509,
|
|
"grad_norm": 6.95665979385376,
|
|
"learning_rate": 7.559578368469294e-05,
|
|
"loss": 1.1223,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.7674794885379229,
|
|
"grad_norm": 7.185131549835205,
|
|
"learning_rate": 7.674152153987169e-05,
|
|
"loss": 1.1113,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.7789344062772949,
|
|
"grad_norm": 6.778895854949951,
|
|
"learning_rate": 7.788725939505041e-05,
|
|
"loss": 1.0765,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.790389324016667,
|
|
"grad_norm": 7.30415153503418,
|
|
"learning_rate": 7.903299725022914e-05,
|
|
"loss": 1.0601,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.8018442417560389,
|
|
"grad_norm": 6.911710739135742,
|
|
"learning_rate": 8.017873510540789e-05,
|
|
"loss": 1.0406,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.8132991594954109,
|
|
"grad_norm": 7.257194995880127,
|
|
"learning_rate": 8.132447296058661e-05,
|
|
"loss": 1.0284,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.8247540772347829,
|
|
"grad_norm": 8.09947395324707,
|
|
"learning_rate": 8.247021081576536e-05,
|
|
"loss": 1.0178,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.8362089949741548,
|
|
"grad_norm": 7.630951404571533,
|
|
"learning_rate": 8.361594867094409e-05,
|
|
"loss": 1.0031,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.8476639127135268,
|
|
"grad_norm": 7.508652210235596,
|
|
"learning_rate": 8.476168652612283e-05,
|
|
"loss": 0.9628,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.8591188304528988,
|
|
"grad_norm": 8.247767448425293,
|
|
"learning_rate": 8.590742438130156e-05,
|
|
"loss": 0.9669,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.8705737481922708,
|
|
"grad_norm": 7.914950370788574,
|
|
"learning_rate": 8.705316223648031e-05,
|
|
"loss": 0.9656,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.8820286659316428,
|
|
"grad_norm": 7.725244045257568,
|
|
"learning_rate": 8.819890009165903e-05,
|
|
"loss": 0.9541,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.8934835836710148,
|
|
"grad_norm": 6.968287467956543,
|
|
"learning_rate": 8.934463794683778e-05,
|
|
"loss": 0.9421,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.9049385014103868,
|
|
"grad_norm": 6.712941646575928,
|
|
"learning_rate": 9.049037580201651e-05,
|
|
"loss": 0.921,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.9163934191497587,
|
|
"grad_norm": 6.738905429840088,
|
|
"learning_rate": 9.163611365719523e-05,
|
|
"loss": 0.9133,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.9278483368891307,
|
|
"grad_norm": 8.376337051391602,
|
|
"learning_rate": 9.278185151237398e-05,
|
|
"loss": 0.9016,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.9393032546285027,
|
|
"grad_norm": 7.274137020111084,
|
|
"learning_rate": 9.392758936755271e-05,
|
|
"loss": 0.8829,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.9507581723678746,
|
|
"grad_norm": 7.919043064117432,
|
|
"learning_rate": 9.507332722273144e-05,
|
|
"loss": 0.8555,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.9622130901072466,
|
|
"grad_norm": 6.632596015930176,
|
|
"learning_rate": 9.621906507791018e-05,
|
|
"loss": 0.8945,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.9736680078466187,
|
|
"grad_norm": 7.122948169708252,
|
|
"learning_rate": 9.736480293308891e-05,
|
|
"loss": 0.8447,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.9851229255859907,
|
|
"grad_norm": 6.747700214385986,
|
|
"learning_rate": 9.851054078826765e-05,
|
|
"loss": 0.8283,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.9965778433253626,
|
|
"grad_norm": 6.4440765380859375,
|
|
"learning_rate": 9.965627864344639e-05,
|
|
"loss": 0.8052,
|
|
"memory/device_mem_reserved(gib)": 53.21,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 1.0080184424175604,
|
|
"grad_norm": 6.0668182373046875,
|
|
"learning_rate": 9.99998041506907e-05,
|
|
"loss": 0.7038,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 1.0194733601569324,
|
|
"grad_norm": 8.027034759521484,
|
|
"learning_rate": 9.999884489246108e-05,
|
|
"loss": 0.6596,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 1.0309282778963043,
|
|
"grad_norm": 6.251341819763184,
|
|
"learning_rate": 9.999708626830618e-05,
|
|
"loss": 0.6702,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 1.0423831956356764,
|
|
"grad_norm": 6.4562506675720215,
|
|
"learning_rate": 9.999452830634232e-05,
|
|
"loss": 0.6421,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 1.0538381133750483,
|
|
"grad_norm": 6.475341796875,
|
|
"learning_rate": 9.999117104746543e-05,
|
|
"loss": 0.6355,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 1.0652930311144204,
|
|
"grad_norm": 6.397785663604736,
|
|
"learning_rate": 9.998701454535029e-05,
|
|
"loss": 0.638,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 1.0767479488537923,
|
|
"grad_norm": 6.843462944030762,
|
|
"learning_rate": 9.998205886644977e-05,
|
|
"loss": 0.6332,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 1.0882028665931642,
|
|
"grad_norm": 6.432698726654053,
|
|
"learning_rate": 9.997630408999371e-05,
|
|
"loss": 0.6187,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 1.0996577843325364,
|
|
"grad_norm": 7.654291152954102,
|
|
"learning_rate": 9.996975030798767e-05,
|
|
"loss": 0.6118,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 1.1111127020719083,
|
|
"grad_norm": 5.9475812911987305,
|
|
"learning_rate": 9.996239762521151e-05,
|
|
"loss": 0.6068,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 1.1225676198112802,
|
|
"grad_norm": 7.744262218475342,
|
|
"learning_rate": 9.995424615921757e-05,
|
|
"loss": 0.6021,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 1.1340225375506523,
|
|
"grad_norm": 6.4447197914123535,
|
|
"learning_rate": 9.9945296040329e-05,
|
|
"loss": 0.6119,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 1.1454774552900242,
|
|
"grad_norm": 6.5645432472229,
|
|
"learning_rate": 9.993554741163749e-05,
|
|
"loss": 0.5836,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 1.156932373029396,
|
|
"grad_norm": 6.169116020202637,
|
|
"learning_rate": 9.992500042900104e-05,
|
|
"loss": 0.585,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 1.1683872907687682,
|
|
"grad_norm": 8.242532730102539,
|
|
"learning_rate": 9.991365526104154e-05,
|
|
"loss": 0.5657,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 1.1798422085081401,
|
|
"grad_norm": 7.146617412567139,
|
|
"learning_rate": 9.990151208914202e-05,
|
|
"loss": 0.5808,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 1.191297126247512,
|
|
"grad_norm": 6.222508430480957,
|
|
"learning_rate": 9.988857110744367e-05,
|
|
"loss": 0.554,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 1.2027520439868842,
|
|
"grad_norm": 6.2183146476745605,
|
|
"learning_rate": 9.987483252284291e-05,
|
|
"loss": 0.549,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 1.214206961726256,
|
|
"grad_norm": 6.201925754547119,
|
|
"learning_rate": 9.986029655498792e-05,
|
|
"loss": 0.5595,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 1.225661879465628,
|
|
"grad_norm": 6.70211124420166,
|
|
"learning_rate": 9.984496343627523e-05,
|
|
"loss": 0.557,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 1.2371167972050001,
|
|
"grad_norm": 6.915555477142334,
|
|
"learning_rate": 9.982883341184593e-05,
|
|
"loss": 0.5267,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 1.248571714944372,
|
|
"grad_norm": 6.054184913635254,
|
|
"learning_rate": 9.981190673958185e-05,
|
|
"loss": 0.5359,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 1.260026632683744,
|
|
"grad_norm": 6.70853328704834,
|
|
"learning_rate": 9.979418369010131e-05,
|
|
"loss": 0.5326,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 1.271481550423116,
|
|
"grad_norm": 6.052192211151123,
|
|
"learning_rate": 9.977566454675492e-05,
|
|
"loss": 0.5156,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 1.282936468162488,
|
|
"grad_norm": 6.540433406829834,
|
|
"learning_rate": 9.975634960562094e-05,
|
|
"loss": 0.5274,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 1.2943913859018599,
|
|
"grad_norm": 5.683777332305908,
|
|
"learning_rate": 9.973623917550065e-05,
|
|
"loss": 0.5169,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 1.305846303641232,
|
|
"grad_norm": 5.470891952514648,
|
|
"learning_rate": 9.97153335779133e-05,
|
|
"loss": 0.5018,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 1.317301221380604,
|
|
"grad_norm": 4.297957897186279,
|
|
"learning_rate": 9.969363314709107e-05,
|
|
"loss": 0.4915,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 1.3287561391199758,
|
|
"grad_norm": 6.072817325592041,
|
|
"learning_rate": 9.967113822997367e-05,
|
|
"loss": 0.4886,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 1.340211056859348,
|
|
"grad_norm": 5.685266017913818,
|
|
"learning_rate": 9.964784918620282e-05,
|
|
"loss": 0.4925,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 1.3516659745987198,
|
|
"grad_norm": 7.324371337890625,
|
|
"learning_rate": 9.962376638811648e-05,
|
|
"loss": 0.4557,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 1.363120892338092,
|
|
"grad_norm": 5.497219085693359,
|
|
"learning_rate": 9.959889022074291e-05,
|
|
"loss": 0.4731,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 1.3745758100774639,
|
|
"grad_norm": 5.637268543243408,
|
|
"learning_rate": 9.95732210817945e-05,
|
|
"loss": 0.4653,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 1.3860307278168358,
|
|
"grad_norm": 5.0990705490112305,
|
|
"learning_rate": 9.954675938166145e-05,
|
|
"loss": 0.4563,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 1.397485645556208,
|
|
"grad_norm": 5.403497695922852,
|
|
"learning_rate": 9.951950554340515e-05,
|
|
"loss": 0.4427,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 1.4089405632955798,
|
|
"grad_norm": 5.238762378692627,
|
|
"learning_rate": 9.949146000275145e-05,
|
|
"loss": 0.4517,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 1.420395481034952,
|
|
"grad_norm": 3.6025900840759277,
|
|
"learning_rate": 9.946262320808371e-05,
|
|
"loss": 0.4287,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 1.4318503987743239,
|
|
"grad_norm": 4.929942607879639,
|
|
"learning_rate": 9.94329956204356e-05,
|
|
"loss": 0.4268,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 1.4433053165136958,
|
|
"grad_norm": 6.123436450958252,
|
|
"learning_rate": 9.940257771348375e-05,
|
|
"loss": 0.4254,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 1.4547602342530679,
|
|
"grad_norm": 6.038297653198242,
|
|
"learning_rate": 9.937136997354015e-05,
|
|
"loss": 0.4089,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 1.4662151519924398,
|
|
"grad_norm": 5.310572147369385,
|
|
"learning_rate": 9.93393728995444e-05,
|
|
"loss": 0.4142,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 1.4776700697318117,
|
|
"grad_norm": 5.765950679779053,
|
|
"learning_rate": 9.930658700305576e-05,
|
|
"loss": 0.4095,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 1.4891249874711838,
|
|
"grad_norm": 5.236095905303955,
|
|
"learning_rate": 9.927301280824489e-05,
|
|
"loss": 0.4068,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 1.5005799052105557,
|
|
"grad_norm": 5.353938102722168,
|
|
"learning_rate": 9.923865085188552e-05,
|
|
"loss": 0.4121,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 1.5120348229499276,
|
|
"grad_norm": 6.9634881019592285,
|
|
"learning_rate": 9.920350168334591e-05,
|
|
"loss": 0.393,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 1.5234897406892998,
|
|
"grad_norm": 4.650847911834717,
|
|
"learning_rate": 9.916756586457999e-05,
|
|
"loss": 0.385,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 1.5349446584286717,
|
|
"grad_norm": 5.3702311515808105,
|
|
"learning_rate": 9.91308439701184e-05,
|
|
"loss": 0.3965,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 1.5463995761680436,
|
|
"grad_norm": 5.833876132965088,
|
|
"learning_rate": 9.909333658705933e-05,
|
|
"loss": 0.3859,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 1.5578544939074157,
|
|
"grad_norm": 4.853622913360596,
|
|
"learning_rate": 9.905504431505912e-05,
|
|
"loss": 0.3788,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 1.5693094116467876,
|
|
"grad_norm": 4.673356056213379,
|
|
"learning_rate": 9.901596776632266e-05,
|
|
"loss": 0.3726,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 1.5807643293861595,
|
|
"grad_norm": 4.708242416381836,
|
|
"learning_rate": 9.897610756559361e-05,
|
|
"loss": 0.3624,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 1.5922192471255316,
|
|
"grad_norm": 5.4483962059021,
|
|
"learning_rate": 9.893546435014442e-05,
|
|
"loss": 0.371,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 1.6036741648649036,
|
|
"grad_norm": 5.3623223304748535,
|
|
"learning_rate": 9.889403876976614e-05,
|
|
"loss": 0.3574,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 1.6151290826042755,
|
|
"grad_norm": 4.880136013031006,
|
|
"learning_rate": 9.8851831486758e-05,
|
|
"loss": 0.3654,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 1.6265840003436476,
|
|
"grad_norm": 4.732957363128662,
|
|
"learning_rate": 9.880884317591687e-05,
|
|
"loss": 0.3563,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 1.6380389180830195,
|
|
"grad_norm": 4.353087902069092,
|
|
"learning_rate": 9.876507452452646e-05,
|
|
"loss": 0.3523,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 1.6494938358223914,
|
|
"grad_norm": 5.005238056182861,
|
|
"learning_rate": 9.872052623234632e-05,
|
|
"loss": 0.3402,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 1.6609487535617635,
|
|
"grad_norm": 4.400302410125732,
|
|
"learning_rate": 9.867519901160059e-05,
|
|
"loss": 0.3522,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 1.6724036713011354,
|
|
"grad_norm": 5.095331192016602,
|
|
"learning_rate": 9.862909358696674e-05,
|
|
"loss": 0.3431,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 1.6838585890405073,
|
|
"grad_norm": 4.416248798370361,
|
|
"learning_rate": 9.858221069556395e-05,
|
|
"loss": 0.3373,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 1.6953135067798795,
|
|
"grad_norm": 4.021190166473389,
|
|
"learning_rate": 9.85345510869412e-05,
|
|
"loss": 0.3298,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 1.7067684245192514,
|
|
"grad_norm": 5.18602180480957,
|
|
"learning_rate": 9.848611552306548e-05,
|
|
"loss": 0.3405,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 1.7182233422586233,
|
|
"grad_norm": 4.7404608726501465,
|
|
"learning_rate": 9.843690477830945e-05,
|
|
"loss": 0.3278,
|
|
"memory/device_mem_reserved(gib)": 53.23,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 1.7296782599979954,
|
|
"grad_norm": 5.107292175292969,
|
|
"learning_rate": 9.838691963943912e-05,
|
|
"loss": 0.3351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 1.7411331777373675,
|
|
"grad_norm": 4.792062759399414,
|
|
"learning_rate": 9.83361609056013e-05,
|
|
"loss": 0.3212,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 1.7525880954767392,
|
|
"grad_norm": 5.694723606109619,
|
|
"learning_rate": 9.82846293883108e-05,
|
|
"loss": 0.3191,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 1.7640430132161113,
|
|
"grad_norm": 4.297928333282471,
|
|
"learning_rate": 9.823232591143741e-05,
|
|
"loss": 0.3096,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 1.7754979309554835,
|
|
"grad_norm": 4.557746887207031,
|
|
"learning_rate": 9.817925131119279e-05,
|
|
"loss": 0.3055,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 1.7869528486948552,
|
|
"grad_norm": 4.228251934051514,
|
|
"learning_rate": 9.81254064361171e-05,
|
|
"loss": 0.3149,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 1.7984077664342273,
|
|
"grad_norm": 4.910319805145264,
|
|
"learning_rate": 9.807079214706538e-05,
|
|
"loss": 0.3141,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 1.8098626841735994,
|
|
"grad_norm": 5.196345329284668,
|
|
"learning_rate": 9.801540931719384e-05,
|
|
"loss": 0.3035,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 1.8213176019129713,
|
|
"grad_norm": 4.111600875854492,
|
|
"learning_rate": 9.795925883194588e-05,
|
|
"loss": 0.3033,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 1.8327725196523432,
|
|
"grad_norm": 4.21397590637207,
|
|
"learning_rate": 9.790234158903792e-05,
|
|
"loss": 0.3068,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 1.8442274373917154,
|
|
"grad_norm": 4.57835578918457,
|
|
"learning_rate": 9.784465849844511e-05,
|
|
"loss": 0.3,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 1.8556823551310873,
|
|
"grad_norm": 4.8795294761657715,
|
|
"learning_rate": 9.778621048238664e-05,
|
|
"loss": 0.2919,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 1.8671372728704592,
|
|
"grad_norm": 4.112079620361328,
|
|
"learning_rate": 9.77269984753112e-05,
|
|
"loss": 0.2866,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 1.8785921906098313,
|
|
"grad_norm": 5.471593856811523,
|
|
"learning_rate": 9.766702342388184e-05,
|
|
"loss": 0.2942,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 1.8900471083492032,
|
|
"grad_norm": 5.2102766036987305,
|
|
"learning_rate": 9.760628628696096e-05,
|
|
"loss": 0.2926,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 1.901502026088575,
|
|
"grad_norm": 4.992270469665527,
|
|
"learning_rate": 9.754478803559498e-05,
|
|
"loss": 0.2874,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 1.9129569438279472,
|
|
"grad_norm": 4.012945175170898,
|
|
"learning_rate": 9.748252965299872e-05,
|
|
"loss": 0.2774,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 1.9244118615673191,
|
|
"grad_norm": 4.634591102600098,
|
|
"learning_rate": 9.741951213453977e-05,
|
|
"loss": 0.2795,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 1.935866779306691,
|
|
"grad_norm": 4.384332656860352,
|
|
"learning_rate": 9.735573648772257e-05,
|
|
"loss": 0.2785,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 1.9473216970460632,
|
|
"grad_norm": 4.638082504272461,
|
|
"learning_rate": 9.72912037321722e-05,
|
|
"loss": 0.2803,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 1.958776614785435,
|
|
"grad_norm": 3.405381917953491,
|
|
"learning_rate": 9.722591489961827e-05,
|
|
"loss": 0.2729,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 1.970231532524807,
|
|
"grad_norm": 4.394991874694824,
|
|
"learning_rate": 9.715987103387823e-05,
|
|
"loss": 0.2751,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 1.9816864502641791,
|
|
"grad_norm": 5.380841255187988,
|
|
"learning_rate": 9.709307319084077e-05,
|
|
"loss": 0.2725,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 1.993141368003551,
|
|
"grad_norm": 3.7391974925994873,
|
|
"learning_rate": 9.702552243844899e-05,
|
|
"loss": 0.2659,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 2.0045819670957488,
|
|
"grad_norm": 3.6832714080810547,
|
|
"learning_rate": 9.69572198566832e-05,
|
|
"loss": 0.2254,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 2.016036884835121,
|
|
"grad_norm": 3.2387888431549072,
|
|
"learning_rate": 9.68881665375438e-05,
|
|
"loss": 0.1553,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 2.0274918025744926,
|
|
"grad_norm": 3.022691488265991,
|
|
"learning_rate": 9.681836358503367e-05,
|
|
"loss": 0.1662,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 2.0389467203138647,
|
|
"grad_norm": 3.7819292545318604,
|
|
"learning_rate": 9.674781211514063e-05,
|
|
"loss": 0.1651,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 2.050401638053237,
|
|
"grad_norm": 4.307174205780029,
|
|
"learning_rate": 9.667651325581955e-05,
|
|
"loss": 0.1595,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 2.0618565557926085,
|
|
"grad_norm": 3.7441294193267822,
|
|
"learning_rate": 9.660446814697436e-05,
|
|
"loss": 0.1603,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 2.0733114735319806,
|
|
"grad_norm": 3.3949477672576904,
|
|
"learning_rate": 9.653167794043976e-05,
|
|
"loss": 0.1635,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 2.0847663912713528,
|
|
"grad_norm": 3.6564900875091553,
|
|
"learning_rate": 9.645814379996285e-05,
|
|
"loss": 0.1595,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 2.0962213090107245,
|
|
"grad_norm": 3.380403995513916,
|
|
"learning_rate": 9.638386690118452e-05,
|
|
"loss": 0.1552,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 2.1076762267500966,
|
|
"grad_norm": 3.9699547290802,
|
|
"learning_rate": 9.630884843162063e-05,
|
|
"loss": 0.1603,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 2.1191311444894687,
|
|
"grad_norm": 2.764639139175415,
|
|
"learning_rate": 9.623308959064306e-05,
|
|
"loss": 0.1587,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 2.130586062228841,
|
|
"grad_norm": 3.9039690494537354,
|
|
"learning_rate": 9.615659158946053e-05,
|
|
"loss": 0.1621,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 2.1420409799682125,
|
|
"grad_norm": 3.1429221630096436,
|
|
"learning_rate": 9.607935565109917e-05,
|
|
"loss": 0.161,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 2.1534958977075846,
|
|
"grad_norm": 3.3480520248413086,
|
|
"learning_rate": 9.600138301038311e-05,
|
|
"loss": 0.1645,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 2.1649508154469568,
|
|
"grad_norm": 3.3411660194396973,
|
|
"learning_rate": 9.592267491391452e-05,
|
|
"loss": 0.1637,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 2.1764057331863285,
|
|
"grad_norm": 3.773784637451172,
|
|
"learning_rate": 9.584323262005393e-05,
|
|
"loss": 0.1631,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 2.1878606509257006,
|
|
"grad_norm": 2.9222793579101562,
|
|
"learning_rate": 9.576305739889991e-05,
|
|
"loss": 0.1598,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 2.1993155686650727,
|
|
"grad_norm": 3.034086227416992,
|
|
"learning_rate": 9.568215053226888e-05,
|
|
"loss": 0.1602,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 2.2107704864044444,
|
|
"grad_norm": 4.284358501434326,
|
|
"learning_rate": 9.560051331367457e-05,
|
|
"loss": 0.1624,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 2.2222254041438165,
|
|
"grad_norm": 4.235621929168701,
|
|
"learning_rate": 9.551814704830734e-05,
|
|
"loss": 0.1593,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 2.2336803218831887,
|
|
"grad_norm": 3.487086057662964,
|
|
"learning_rate": 9.543505305301334e-05,
|
|
"loss": 0.155,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 2.2451352396225603,
|
|
"grad_norm": 3.9365508556365967,
|
|
"learning_rate": 9.535123265627343e-05,
|
|
"loss": 0.1608,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 2.2565901573619325,
|
|
"grad_norm": 4.065316200256348,
|
|
"learning_rate": 9.526668719818195e-05,
|
|
"loss": 0.1623,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 2.2680450751013046,
|
|
"grad_norm": 3.1943957805633545,
|
|
"learning_rate": 9.518141803042527e-05,
|
|
"loss": 0.1646,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 2.2794999928406763,
|
|
"grad_norm": 3.362541913986206,
|
|
"learning_rate": 9.509542651626027e-05,
|
|
"loss": 0.1591,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 2.2909549105800484,
|
|
"grad_norm": 3.442073345184326,
|
|
"learning_rate": 9.500871403049239e-05,
|
|
"loss": 0.1604,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 2.3024098283194205,
|
|
"grad_norm": 3.4276912212371826,
|
|
"learning_rate": 9.492128195945383e-05,
|
|
"loss": 0.1571,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 2.313864746058792,
|
|
"grad_norm": 2.761948347091675,
|
|
"learning_rate": 9.483313170098121e-05,
|
|
"loss": 0.1535,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 2.3253196637981643,
|
|
"grad_norm": 3.1246402263641357,
|
|
"learning_rate": 9.474426466439337e-05,
|
|
"loss": 0.1579,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 2.3367745815375365,
|
|
"grad_norm": 3.328728437423706,
|
|
"learning_rate": 9.465468227046876e-05,
|
|
"loss": 0.1567,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 2.348229499276908,
|
|
"grad_norm": 4.195374965667725,
|
|
"learning_rate": 9.456438595142272e-05,
|
|
"loss": 0.1542,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 2.3596844170162803,
|
|
"grad_norm": 3.6173229217529297,
|
|
"learning_rate": 9.447337715088461e-05,
|
|
"loss": 0.1615,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 2.3711393347556524,
|
|
"grad_norm": 3.0115489959716797,
|
|
"learning_rate": 9.438165732387472e-05,
|
|
"loss": 0.1586,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 2.382594252495024,
|
|
"grad_norm": 4.064676284790039,
|
|
"learning_rate": 9.428922793678101e-05,
|
|
"loss": 0.1551,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 2.3940491702343962,
|
|
"grad_norm": 3.5949292182922363,
|
|
"learning_rate": 9.419609046733571e-05,
|
|
"loss": 0.1502,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 2.4055040879737684,
|
|
"grad_norm": 3.932413101196289,
|
|
"learning_rate": 9.410224640459156e-05,
|
|
"loss": 0.157,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 2.41695900571314,
|
|
"grad_norm": 3.8124208450317383,
|
|
"learning_rate": 9.400769724889817e-05,
|
|
"loss": 0.1495,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 2.428413923452512,
|
|
"grad_norm": 3.310115098953247,
|
|
"learning_rate": 9.391244451187793e-05,
|
|
"loss": 0.1572,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 2.4398688411918843,
|
|
"grad_norm": 3.140340566635132,
|
|
"learning_rate": 9.381648971640184e-05,
|
|
"loss": 0.1544,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 2.451323758931256,
|
|
"grad_norm": 3.2607996463775635,
|
|
"learning_rate": 9.371983439656524e-05,
|
|
"loss": 0.1515,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 2.462778676670628,
|
|
"grad_norm": 3.3957531452178955,
|
|
"learning_rate": 9.362248009766321e-05,
|
|
"loss": 0.1506,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 2.4742335944100002,
|
|
"grad_norm": 3.6932249069213867,
|
|
"learning_rate": 9.35244283761659e-05,
|
|
"loss": 0.1417,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 2.4856885121493724,
|
|
"grad_norm": 2.407801389694214,
|
|
"learning_rate": 9.342568079969363e-05,
|
|
"loss": 0.1507,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 2.497143429888744,
|
|
"grad_norm": 3.5010054111480713,
|
|
"learning_rate": 9.33262389469918e-05,
|
|
"loss": 0.1486,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 2.508598347628116,
|
|
"grad_norm": 3.2884604930877686,
|
|
"learning_rate": 9.322610440790572e-05,
|
|
"loss": 0.1545,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 2.520053265367488,
|
|
"grad_norm": 3.1958744525909424,
|
|
"learning_rate": 9.312527878335518e-05,
|
|
"loss": 0.1431,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 2.53150818310686,
|
|
"grad_norm": 3.1914916038513184,
|
|
"learning_rate": 9.302376368530874e-05,
|
|
"loss": 0.147,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 2.542963100846232,
|
|
"grad_norm": 2.7763078212738037,
|
|
"learning_rate": 9.292156073675815e-05,
|
|
"loss": 0.1471,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 2.5544180185856042,
|
|
"grad_norm": 3.8447723388671875,
|
|
"learning_rate": 9.281867157169221e-05,
|
|
"loss": 0.1463,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 2.565872936324976,
|
|
"grad_norm": 3.5225303173065186,
|
|
"learning_rate": 9.27150978350708e-05,
|
|
"loss": 0.1462,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 2.577327854064348,
|
|
"grad_norm": 3.2575135231018066,
|
|
"learning_rate": 9.261084118279847e-05,
|
|
"loss": 0.139,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 2.5887827718037197,
|
|
"grad_norm": 3.11187481880188,
|
|
"learning_rate": 9.250590328169807e-05,
|
|
"loss": 0.1423,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 2.600237689543092,
|
|
"grad_norm": 3.156135082244873,
|
|
"learning_rate": 9.240028580948395e-05,
|
|
"loss": 0.1426,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 2.611692607282464,
|
|
"grad_norm": 3.4446299076080322,
|
|
"learning_rate": 9.229399045473532e-05,
|
|
"loss": 0.1459,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 2.623147525021836,
|
|
"grad_norm": 3.1665008068084717,
|
|
"learning_rate": 9.218701891686916e-05,
|
|
"loss": 0.1489,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 2.634602442761208,
|
|
"grad_norm": 2.7036280632019043,
|
|
"learning_rate": 9.207937290611298e-05,
|
|
"loss": 0.1407,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 2.64605736050058,
|
|
"grad_norm": 3.9781899452209473,
|
|
"learning_rate": 9.197105414347762e-05,
|
|
"loss": 0.1476,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 2.6575122782399516,
|
|
"grad_norm": 2.9390923976898193,
|
|
"learning_rate": 9.186206436072965e-05,
|
|
"loss": 0.1369,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 2.6689671959793237,
|
|
"grad_norm": 1.9289586544036865,
|
|
"learning_rate": 9.175240530036369e-05,
|
|
"loss": 0.1363,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 2.680422113718696,
|
|
"grad_norm": 3.644439697265625,
|
|
"learning_rate": 9.164207871557456e-05,
|
|
"loss": 0.1415,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 2.691877031458068,
|
|
"grad_norm": 3.1818296909332275,
|
|
"learning_rate": 9.153108637022928e-05,
|
|
"loss": 0.1371,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 2.7033319491974397,
|
|
"grad_norm": 2.6996982097625732,
|
|
"learning_rate": 9.14194300388388e-05,
|
|
"loss": 0.1409,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 2.714786866936812,
|
|
"grad_norm": 3.8771860599517822,
|
|
"learning_rate": 9.13071115065297e-05,
|
|
"loss": 0.1395,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 2.726241784676184,
|
|
"grad_norm": 3.087873935699463,
|
|
"learning_rate": 9.119413256901563e-05,
|
|
"loss": 0.1374,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 2.7376967024155556,
|
|
"grad_norm": 3.33695650100708,
|
|
"learning_rate": 9.108049503256854e-05,
|
|
"loss": 0.1378,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 2.7491516201549278,
|
|
"grad_norm": 3.057760715484619,
|
|
"learning_rate": 9.096620071398994e-05,
|
|
"loss": 0.1417,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 2.7606065378943,
|
|
"grad_norm": 4.001928329467773,
|
|
"learning_rate": 9.085125144058168e-05,
|
|
"loss": 0.1405,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 2.7720614556336716,
|
|
"grad_norm": 2.8355178833007812,
|
|
"learning_rate": 9.073564905011689e-05,
|
|
"loss": 0.1426,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 2.7835163733730437,
|
|
"grad_norm": 3.0020503997802734,
|
|
"learning_rate": 9.061939539081049e-05,
|
|
"loss": 0.1386,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 2.794971291112416,
|
|
"grad_norm": 4.463298797607422,
|
|
"learning_rate": 9.05024923212897e-05,
|
|
"loss": 0.1368,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 2.8064262088517875,
|
|
"grad_norm": 3.095207929611206,
|
|
"learning_rate": 9.03849417105643e-05,
|
|
"loss": 0.139,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 2.8178811265911596,
|
|
"grad_norm": 3.377472162246704,
|
|
"learning_rate": 9.026674543799676e-05,
|
|
"loss": 0.1356,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 2.8293360443305318,
|
|
"grad_norm": 3.876528739929199,
|
|
"learning_rate": 9.01479053932722e-05,
|
|
"loss": 0.1356,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 2.840790962069904,
|
|
"grad_norm": 2.9100306034088135,
|
|
"learning_rate": 9.002842347636815e-05,
|
|
"loss": 0.1353,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 2.8522458798092756,
|
|
"grad_norm": 2.7643377780914307,
|
|
"learning_rate": 8.990830159752422e-05,
|
|
"loss": 0.1338,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 2.8637007975486477,
|
|
"grad_norm": 2.872948169708252,
|
|
"learning_rate": 8.978754167721151e-05,
|
|
"loss": 0.1352,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 2.8751557152880194,
|
|
"grad_norm": 3.3348748683929443,
|
|
"learning_rate": 8.96661456461019e-05,
|
|
"loss": 0.1337,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 2.8866106330273915,
|
|
"grad_norm": 2.863382577896118,
|
|
"learning_rate": 8.954411544503729e-05,
|
|
"loss": 0.1291,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 2.8980655507667636,
|
|
"grad_norm": 3.632277250289917,
|
|
"learning_rate": 8.94214530249984e-05,
|
|
"loss": 0.1325,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 2.9095204685061358,
|
|
"grad_norm": 3.788857936859131,
|
|
"learning_rate": 8.929816034707375e-05,
|
|
"loss": 0.1331,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 2.9209753862455075,
|
|
"grad_norm": 2.73443865776062,
|
|
"learning_rate": 8.917423938242814e-05,
|
|
"loss": 0.1322,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 2.9324303039848796,
|
|
"grad_norm": 3.1101582050323486,
|
|
"learning_rate": 8.904969211227134e-05,
|
|
"loss": 0.1274,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 2.9438852217242513,
|
|
"grad_norm": 2.1412153244018555,
|
|
"learning_rate": 8.892452052782616e-05,
|
|
"loss": 0.1363,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 2.9553401394636234,
|
|
"grad_norm": 2.4939417839050293,
|
|
"learning_rate": 8.879872663029689e-05,
|
|
"loss": 0.1317,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 2.9667950572029955,
|
|
"grad_norm": 2.754542589187622,
|
|
"learning_rate": 8.867231243083703e-05,
|
|
"loss": 0.1257,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 2.9782499749423677,
|
|
"grad_norm": 2.955983877182007,
|
|
"learning_rate": 8.854527995051738e-05,
|
|
"loss": 0.1289,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 2.9897048926817393,
|
|
"grad_norm": 3.313758373260498,
|
|
"learning_rate": 8.841763122029358e-05,
|
|
"loss": 0.1308,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 3.0011454917739373,
|
|
"grad_norm": 1.7117892503738403,
|
|
"learning_rate": 8.828936828097368e-05,
|
|
"loss": 0.1221,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 3.012600409513309,
|
|
"grad_norm": 3.7318451404571533,
|
|
"learning_rate": 8.816049318318552e-05,
|
|
"loss": 0.0704,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 3.024055327252681,
|
|
"grad_norm": 2.1490225791931152,
|
|
"learning_rate": 8.803100798734391e-05,
|
|
"loss": 0.0698,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 3.0355102449920532,
|
|
"grad_norm": 2.4357903003692627,
|
|
"learning_rate": 8.790091476361777e-05,
|
|
"loss": 0.0717,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 3.046965162731425,
|
|
"grad_norm": 3.2305984497070312,
|
|
"learning_rate": 8.777021559189695e-05,
|
|
"loss": 0.0673,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 3.058420080470797,
|
|
"grad_norm": 2.8263580799102783,
|
|
"learning_rate": 8.763891256175902e-05,
|
|
"loss": 0.069,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 3.069874998210169,
|
|
"grad_norm": 3.3232004642486572,
|
|
"learning_rate": 8.750700777243583e-05,
|
|
"loss": 0.0723,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 3.0813299159495413,
|
|
"grad_norm": 2.5803654193878174,
|
|
"learning_rate": 8.737450333277996e-05,
|
|
"loss": 0.068,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 3.092784833688913,
|
|
"grad_norm": 3.2602574825286865,
|
|
"learning_rate": 8.724140136123106e-05,
|
|
"loss": 0.0682,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 3.104239751428285,
|
|
"grad_norm": 3.49511456489563,
|
|
"learning_rate": 8.710770398578189e-05,
|
|
"loss": 0.0744,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 3.1156946691676572,
|
|
"grad_norm": 3.492642879486084,
|
|
"learning_rate": 8.697341334394435e-05,
|
|
"loss": 0.0678,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 3.127149586907029,
|
|
"grad_norm": 2.680922269821167,
|
|
"learning_rate": 8.683853158271532e-05,
|
|
"loss": 0.0682,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 3.138604504646401,
|
|
"grad_norm": 2.501112699508667,
|
|
"learning_rate": 8.670306085854229e-05,
|
|
"loss": 0.0727,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 3.150059422385773,
|
|
"grad_norm": 1.7489196062088013,
|
|
"learning_rate": 8.65670033372889e-05,
|
|
"loss": 0.0706,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 3.161514340125145,
|
|
"grad_norm": 2.4260241985321045,
|
|
"learning_rate": 8.643036119420033e-05,
|
|
"loss": 0.0718,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 3.172969257864517,
|
|
"grad_norm": 3.021453380584717,
|
|
"learning_rate": 8.629313661386856e-05,
|
|
"loss": 0.0723,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 3.184424175603889,
|
|
"grad_norm": 2.5771586894989014,
|
|
"learning_rate": 8.615533179019726e-05,
|
|
"loss": 0.0712,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 3.195879093343261,
|
|
"grad_norm": 3.019286870956421,
|
|
"learning_rate": 8.6016948926367e-05,
|
|
"loss": 0.0705,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 3.207334011082633,
|
|
"grad_norm": 2.4302775859832764,
|
|
"learning_rate": 8.587799023479982e-05,
|
|
"loss": 0.071,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 3.218788928822005,
|
|
"grad_norm": 1.8431477546691895,
|
|
"learning_rate": 8.573845793712383e-05,
|
|
"loss": 0.0727,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 3.2302438465613768,
|
|
"grad_norm": 2.839580774307251,
|
|
"learning_rate": 8.559835426413794e-05,
|
|
"loss": 0.0739,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 3.241698764300749,
|
|
"grad_norm": 3.9472312927246094,
|
|
"learning_rate": 8.545768145577589e-05,
|
|
"loss": 0.0689,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 3.253153682040121,
|
|
"grad_norm": 2.908961296081543,
|
|
"learning_rate": 8.531644176107066e-05,
|
|
"loss": 0.0701,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 3.2646085997794927,
|
|
"grad_norm": 1.9942492246627808,
|
|
"learning_rate": 8.517463743811836e-05,
|
|
"loss": 0.0708,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 3.276063517518865,
|
|
"grad_norm": 2.883118152618408,
|
|
"learning_rate": 8.503227075404227e-05,
|
|
"loss": 0.0751,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 3.287518435258237,
|
|
"grad_norm": 2.3924851417541504,
|
|
"learning_rate": 8.488934398495649e-05,
|
|
"loss": 0.0725,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 3.2989733529976086,
|
|
"grad_norm": 2.108149766921997,
|
|
"learning_rate": 8.474585941592959e-05,
|
|
"loss": 0.0754,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 3.3104282707369808,
|
|
"grad_norm": 1.8208028078079224,
|
|
"learning_rate": 8.460181934094809e-05,
|
|
"loss": 0.0713,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 3.321883188476353,
|
|
"grad_norm": 2.987584114074707,
|
|
"learning_rate": 8.445722606287971e-05,
|
|
"loss": 0.0727,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 3.3333381062157246,
|
|
"grad_norm": 3.576843023300171,
|
|
"learning_rate": 8.43120818934367e-05,
|
|
"loss": 0.0692,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 3.3447930239550967,
|
|
"grad_norm": 1.5616097450256348,
|
|
"learning_rate": 8.416638915313868e-05,
|
|
"loss": 0.071,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 3.356247941694469,
|
|
"grad_norm": 2.461344003677368,
|
|
"learning_rate": 8.402015017127571e-05,
|
|
"loss": 0.0728,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 3.3677028594338405,
|
|
"grad_norm": 2.740246534347534,
|
|
"learning_rate": 8.387336728587103e-05,
|
|
"loss": 0.0738,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 3.3791577771732126,
|
|
"grad_norm": 2.1253201961517334,
|
|
"learning_rate": 8.372604284364355e-05,
|
|
"loss": 0.0721,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 3.3906126949125848,
|
|
"grad_norm": 2.5474374294281006,
|
|
"learning_rate": 8.357817919997049e-05,
|
|
"loss": 0.0701,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 3.4020676126519565,
|
|
"grad_norm": 1.9206650257110596,
|
|
"learning_rate": 8.34297787188496e-05,
|
|
"loss": 0.0721,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 3.4135225303913286,
|
|
"grad_norm": 2.298408031463623,
|
|
"learning_rate": 8.328084377286149e-05,
|
|
"loss": 0.0719,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 3.4249774481307007,
|
|
"grad_norm": 2.9477977752685547,
|
|
"learning_rate": 8.313137674313158e-05,
|
|
"loss": 0.0724,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 3.436432365870073,
|
|
"grad_norm": 2.4904532432556152,
|
|
"learning_rate": 8.298138001929206e-05,
|
|
"loss": 0.0726,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 3.4478872836094445,
|
|
"grad_norm": 2.2400805950164795,
|
|
"learning_rate": 8.283085599944376e-05,
|
|
"loss": 0.0713,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 3.4593422013488166,
|
|
"grad_norm": 2.3121421337127686,
|
|
"learning_rate": 8.267980709011769e-05,
|
|
"loss": 0.0668,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 3.4707971190881883,
|
|
"grad_norm": 2.701951026916504,
|
|
"learning_rate": 8.25282357062367e-05,
|
|
"loss": 0.0698,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 3.4822520368275605,
|
|
"grad_norm": 2.5985162258148193,
|
|
"learning_rate": 8.237614427107672e-05,
|
|
"loss": 0.0682,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 3.4937069545669326,
|
|
"grad_norm": 1.998067855834961,
|
|
"learning_rate": 8.222353521622819e-05,
|
|
"loss": 0.0716,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 3.5051618723063047,
|
|
"grad_norm": 2.705017328262329,
|
|
"learning_rate": 8.2070410981557e-05,
|
|
"loss": 0.0687,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 3.5166167900456764,
|
|
"grad_norm": 2.35690975189209,
|
|
"learning_rate": 8.191677401516565e-05,
|
|
"loss": 0.0693,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 3.5280717077850485,
|
|
"grad_norm": 2.5952446460723877,
|
|
"learning_rate": 8.176262677335398e-05,
|
|
"loss": 0.0712,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 3.53952662552442,
|
|
"grad_norm": 2.347503662109375,
|
|
"learning_rate": 8.160797172057998e-05,
|
|
"loss": 0.0724,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 3.5509815432637923,
|
|
"grad_norm": 2.6107993125915527,
|
|
"learning_rate": 8.145281132942037e-05,
|
|
"loss": 0.069,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 3.5624364610031645,
|
|
"grad_norm": 2.2941091060638428,
|
|
"learning_rate": 8.129714808053106e-05,
|
|
"loss": 0.069,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 3.5738913787425366,
|
|
"grad_norm": 3.4392402172088623,
|
|
"learning_rate": 8.114098446260745e-05,
|
|
"loss": 0.072,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 3.5853462964819083,
|
|
"grad_norm": 1.876505732536316,
|
|
"learning_rate": 8.098432297234473e-05,
|
|
"loss": 0.0694,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 3.5968012142212804,
|
|
"grad_norm": 1.9874284267425537,
|
|
"learning_rate": 8.082716611439793e-05,
|
|
"loss": 0.0685,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 3.608256131960652,
|
|
"grad_norm": 2.479461669921875,
|
|
"learning_rate": 8.066951640134181e-05,
|
|
"loss": 0.0696,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 3.619711049700024,
|
|
"grad_norm": 2.318502426147461,
|
|
"learning_rate": 8.051137635363078e-05,
|
|
"loss": 0.0712,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 3.6311659674393963,
|
|
"grad_norm": 2.2743539810180664,
|
|
"learning_rate": 8.035274849955858e-05,
|
|
"loss": 0.066,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 3.6426208851787685,
|
|
"grad_norm": 2.7927591800689697,
|
|
"learning_rate": 8.019363537521781e-05,
|
|
"loss": 0.0722,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 3.65407580291814,
|
|
"grad_norm": 2.3082404136657715,
|
|
"learning_rate": 8.003403952445942e-05,
|
|
"loss": 0.0727,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 3.6655307206575123,
|
|
"grad_norm": 1.7190062999725342,
|
|
"learning_rate": 7.987396349885207e-05,
|
|
"loss": 0.0688,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 3.6769856383968844,
|
|
"grad_norm": 2.170894145965576,
|
|
"learning_rate": 7.97134098576413e-05,
|
|
"loss": 0.0643,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 3.688440556136256,
|
|
"grad_norm": 2.3685245513916016,
|
|
"learning_rate": 7.955238116770859e-05,
|
|
"loss": 0.0667,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 3.6998954738756282,
|
|
"grad_norm": 2.269733190536499,
|
|
"learning_rate": 7.939088000353038e-05,
|
|
"loss": 0.0653,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 3.7113503916150004,
|
|
"grad_norm": 2.966156005859375,
|
|
"learning_rate": 7.922890894713688e-05,
|
|
"loss": 0.0641,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 3.722805309354372,
|
|
"grad_norm": 2.5244526863098145,
|
|
"learning_rate": 7.906647058807078e-05,
|
|
"loss": 0.0673,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 3.734260227093744,
|
|
"grad_norm": 2.3612561225891113,
|
|
"learning_rate": 7.890356752334585e-05,
|
|
"loss": 0.0682,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 3.7457151448331163,
|
|
"grad_norm": 2.6866989135742188,
|
|
"learning_rate": 7.874020235740544e-05,
|
|
"loss": 0.0689,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 3.757170062572488,
|
|
"grad_norm": 2.266900062561035,
|
|
"learning_rate": 7.857637770208084e-05,
|
|
"loss": 0.0698,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 3.76862498031186,
|
|
"grad_norm": 2.235653877258301,
|
|
"learning_rate": 7.841209617654949e-05,
|
|
"loss": 0.0642,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 3.7800798980512322,
|
|
"grad_norm": 4.613194942474365,
|
|
"learning_rate": 7.824736040729315e-05,
|
|
"loss": 0.0646,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 3.7915348157906044,
|
|
"grad_norm": 1.9603101015090942,
|
|
"learning_rate": 7.808217302805587e-05,
|
|
"loss": 0.0686,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 3.802989733529976,
|
|
"grad_norm": 2.1632003784179688,
|
|
"learning_rate": 7.791653667980191e-05,
|
|
"loss": 0.0663,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 3.814444651269348,
|
|
"grad_norm": 2.5433571338653564,
|
|
"learning_rate": 7.77504540106735e-05,
|
|
"loss": 0.0664,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 3.82589956900872,
|
|
"grad_norm": 3.197382926940918,
|
|
"learning_rate": 7.758392767594853e-05,
|
|
"loss": 0.0679,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 3.837354486748092,
|
|
"grad_norm": 2.555476188659668,
|
|
"learning_rate": 7.741696033799804e-05,
|
|
"loss": 0.0681,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 3.848809404487464,
|
|
"grad_norm": 2.589463233947754,
|
|
"learning_rate": 7.724955466624371e-05,
|
|
"loss": 0.0677,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 3.8602643222268362,
|
|
"grad_norm": 2.2410428524017334,
|
|
"learning_rate": 7.708171333711517e-05,
|
|
"loss": 0.0688,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 3.871719239966208,
|
|
"grad_norm": 2.9268081188201904,
|
|
"learning_rate": 7.69134390340072e-05,
|
|
"loss": 0.0674,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 3.88317415770558,
|
|
"grad_norm": 2.1275105476379395,
|
|
"learning_rate": 7.674473444723684e-05,
|
|
"loss": 0.0677,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 3.8946290754449517,
|
|
"grad_norm": 1.7868996858596802,
|
|
"learning_rate": 7.657560227400037e-05,
|
|
"loss": 0.0667,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 3.906083993184324,
|
|
"grad_norm": 2.705197811126709,
|
|
"learning_rate": 7.640604521833015e-05,
|
|
"loss": 0.0713,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 3.917538910923696,
|
|
"grad_norm": 1.5226702690124512,
|
|
"learning_rate": 7.62360659910515e-05,
|
|
"loss": 0.067,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 3.928993828663068,
|
|
"grad_norm": 2.7335004806518555,
|
|
"learning_rate": 7.60656673097392e-05,
|
|
"loss": 0.0653,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 3.94044874640244,
|
|
"grad_norm": 2.0359129905700684,
|
|
"learning_rate": 7.589485189867422e-05,
|
|
"loss": 0.067,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 3.951903664141812,
|
|
"grad_norm": 2.2404749393463135,
|
|
"learning_rate": 7.572362248880001e-05,
|
|
"loss": 0.0659,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 3.9633585818811836,
|
|
"grad_norm": 1.9133015871047974,
|
|
"learning_rate": 7.555198181767894e-05,
|
|
"loss": 0.0662,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 3.9748134996205557,
|
|
"grad_norm": 3.204033136367798,
|
|
"learning_rate": 7.537993262944849e-05,
|
|
"loss": 0.0644,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 3.986268417359928,
|
|
"grad_norm": 2.0416345596313477,
|
|
"learning_rate": 7.520747767477734e-05,
|
|
"loss": 0.0648,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 3.9977233350993,
|
|
"grad_norm": 2.1592066287994385,
|
|
"learning_rate": 7.50346197108215e-05,
|
|
"loss": 0.0629,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 4.0091639341914975,
|
|
"grad_norm": 2.386658191680908,
|
|
"learning_rate": 7.486136150118015e-05,
|
|
"loss": 0.0421,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 4.02061885193087,
|
|
"grad_norm": 1.3900179862976074,
|
|
"learning_rate": 7.468770581585146e-05,
|
|
"loss": 0.0324,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 4.032073769670242,
|
|
"grad_norm": 1.8588780164718628,
|
|
"learning_rate": 7.451365543118831e-05,
|
|
"loss": 0.0354,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 4.043528687409614,
|
|
"grad_norm": 1.4627822637557983,
|
|
"learning_rate": 7.433921312985393e-05,
|
|
"loss": 0.0328,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 4.054983605148985,
|
|
"grad_norm": 2.9422807693481445,
|
|
"learning_rate": 7.416438170077738e-05,
|
|
"loss": 0.0349,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 4.066438522888357,
|
|
"grad_norm": 1.9216961860656738,
|
|
"learning_rate": 7.398916393910895e-05,
|
|
"loss": 0.0364,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 4.077893440627729,
|
|
"grad_norm": 1.9999079704284668,
|
|
"learning_rate": 7.381356264617557e-05,
|
|
"loss": 0.0351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 4.0893483583671015,
|
|
"grad_norm": 1.1669881343841553,
|
|
"learning_rate": 7.363758062943587e-05,
|
|
"loss": 0.0351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 4.100803276106474,
|
|
"grad_norm": 1.4963182210922241,
|
|
"learning_rate": 7.346122070243539e-05,
|
|
"loss": 0.0351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 4.112258193845846,
|
|
"grad_norm": 2.435983419418335,
|
|
"learning_rate": 7.328448568476163e-05,
|
|
"loss": 0.0353,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 4.123713111585217,
|
|
"grad_norm": 1.783022403717041,
|
|
"learning_rate": 7.310737840199885e-05,
|
|
"loss": 0.0343,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 4.135168029324589,
|
|
"grad_norm": 1.7959028482437134,
|
|
"learning_rate": 7.292990168568302e-05,
|
|
"loss": 0.0344,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 4.146622947063961,
|
|
"grad_norm": 1.0920823812484741,
|
|
"learning_rate": 7.275205837325649e-05,
|
|
"loss": 0.0352,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 4.158077864803333,
|
|
"grad_norm": 2.1539368629455566,
|
|
"learning_rate": 7.257385130802261e-05,
|
|
"loss": 0.0362,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 4.1695327825427055,
|
|
"grad_norm": 2.0688672065734863,
|
|
"learning_rate": 7.239528333910031e-05,
|
|
"loss": 0.0358,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 4.180987700282078,
|
|
"grad_norm": 2.0575592517852783,
|
|
"learning_rate": 7.221635732137854e-05,
|
|
"loss": 0.037,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 4.192442618021449,
|
|
"grad_norm": 2.307478189468384,
|
|
"learning_rate": 7.203707611547066e-05,
|
|
"loss": 0.0383,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 4.203897535760821,
|
|
"grad_norm": 1.4493507146835327,
|
|
"learning_rate": 7.185744258766858e-05,
|
|
"loss": 0.0368,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 4.215352453500193,
|
|
"grad_norm": 1.858702301979065,
|
|
"learning_rate": 7.167745960989708e-05,
|
|
"loss": 0.0371,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 4.226807371239565,
|
|
"grad_norm": 2.091564893722534,
|
|
"learning_rate": 7.149713005966784e-05,
|
|
"loss": 0.037,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 4.238262288978937,
|
|
"grad_norm": 1.320420503616333,
|
|
"learning_rate": 7.13164568200334e-05,
|
|
"loss": 0.0395,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 4.2497172067183095,
|
|
"grad_norm": 1.7669836282730103,
|
|
"learning_rate": 7.113544277954116e-05,
|
|
"loss": 0.036,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 4.261172124457682,
|
|
"grad_norm": 1.7692891359329224,
|
|
"learning_rate": 7.095409083218705e-05,
|
|
"loss": 0.0363,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 4.272627042197053,
|
|
"grad_norm": 1.4716825485229492,
|
|
"learning_rate": 7.077240387736943e-05,
|
|
"loss": 0.0387,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 4.284081959936425,
|
|
"grad_norm": 1.9312763214111328,
|
|
"learning_rate": 7.05903848198426e-05,
|
|
"loss": 0.0351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 4.295536877675797,
|
|
"grad_norm": 1.417018175125122,
|
|
"learning_rate": 7.040803656967045e-05,
|
|
"loss": 0.0364,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 4.306991795415169,
|
|
"grad_norm": 2.400550365447998,
|
|
"learning_rate": 7.022536204217989e-05,
|
|
"loss": 0.0363,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 4.318446713154541,
|
|
"grad_norm": 1.612289547920227,
|
|
"learning_rate": 7.004236415791421e-05,
|
|
"loss": 0.0371,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 4.3299016308939136,
|
|
"grad_norm": 2.4686686992645264,
|
|
"learning_rate": 6.985904584258649e-05,
|
|
"loss": 0.0401,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 4.341356548633285,
|
|
"grad_norm": 3.242429256439209,
|
|
"learning_rate": 6.967541002703274e-05,
|
|
"loss": 0.0353,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 4.352811466372657,
|
|
"grad_norm": 2.2859609127044678,
|
|
"learning_rate": 6.949145964716505e-05,
|
|
"loss": 0.0365,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 4.364266384112029,
|
|
"grad_norm": 2.1360576152801514,
|
|
"learning_rate": 6.930719764392466e-05,
|
|
"loss": 0.0382,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 4.375721301851401,
|
|
"grad_norm": 1.6462370157241821,
|
|
"learning_rate": 6.912262696323497e-05,
|
|
"loss": 0.0358,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 4.387176219590773,
|
|
"grad_norm": 1.5075321197509766,
|
|
"learning_rate": 6.893775055595442e-05,
|
|
"loss": 0.0356,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 4.398631137330145,
|
|
"grad_norm": 1.614206075668335,
|
|
"learning_rate": 6.87525713778293e-05,
|
|
"loss": 0.0392,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 4.410086055069517,
|
|
"grad_norm": 1.9505984783172607,
|
|
"learning_rate": 6.856709238944649e-05,
|
|
"loss": 0.0354,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 4.421540972808889,
|
|
"grad_norm": 1.831098198890686,
|
|
"learning_rate": 6.838131655618618e-05,
|
|
"loss": 0.0355,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 4.432995890548261,
|
|
"grad_norm": 2.2867400646209717,
|
|
"learning_rate": 6.819524684817438e-05,
|
|
"loss": 0.037,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 4.444450808287633,
|
|
"grad_norm": 1.2839210033416748,
|
|
"learning_rate": 6.800888624023553e-05,
|
|
"loss": 0.0375,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 4.455905726027005,
|
|
"grad_norm": 1.812117099761963,
|
|
"learning_rate": 6.782223771184484e-05,
|
|
"loss": 0.0365,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 4.467360643766377,
|
|
"grad_norm": 1.3475086688995361,
|
|
"learning_rate": 6.763530424708072e-05,
|
|
"loss": 0.0356,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 4.4788155615057486,
|
|
"grad_norm": 1.6308741569519043,
|
|
"learning_rate": 6.744808883457707e-05,
|
|
"loss": 0.0367,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 4.490270479245121,
|
|
"grad_norm": 1.424625039100647,
|
|
"learning_rate": 6.726059446747545e-05,
|
|
"loss": 0.0384,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 4.501725396984493,
|
|
"grad_norm": 2.242457389831543,
|
|
"learning_rate": 6.707282414337728e-05,
|
|
"loss": 0.0352,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 4.513180314723865,
|
|
"grad_norm": 2.116205930709839,
|
|
"learning_rate": 6.688478086429589e-05,
|
|
"loss": 0.0374,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 4.524635232463237,
|
|
"grad_norm": 1.493812084197998,
|
|
"learning_rate": 6.669646763660855e-05,
|
|
"loss": 0.0339,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 4.536090150202609,
|
|
"grad_norm": 1.5812180042266846,
|
|
"learning_rate": 6.650788747100832e-05,
|
|
"loss": 0.0375,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 4.54754506794198,
|
|
"grad_norm": 1.9899191856384277,
|
|
"learning_rate": 6.631904338245607e-05,
|
|
"loss": 0.0373,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 4.558999985681353,
|
|
"grad_norm": 1.682928442955017,
|
|
"learning_rate": 6.612993839013211e-05,
|
|
"loss": 0.0363,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 4.570454903420725,
|
|
"grad_norm": 1.5727615356445312,
|
|
"learning_rate": 6.594057551738803e-05,
|
|
"loss": 0.0368,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 4.581909821160097,
|
|
"grad_norm": 1.2249151468276978,
|
|
"learning_rate": 6.575095779169836e-05,
|
|
"loss": 0.0374,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 4.593364738899469,
|
|
"grad_norm": 1.8625729084014893,
|
|
"learning_rate": 6.556108824461206e-05,
|
|
"loss": 0.0356,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 4.604819656638841,
|
|
"grad_norm": 1.3668529987335205,
|
|
"learning_rate": 6.537096991170423e-05,
|
|
"loss": 0.0331,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 4.616274574378213,
|
|
"grad_norm": 1.388374924659729,
|
|
"learning_rate": 6.518060583252741e-05,
|
|
"loss": 0.0355,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 4.627729492117584,
|
|
"grad_norm": 2.348038673400879,
|
|
"learning_rate": 6.498999905056309e-05,
|
|
"loss": 0.0369,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 4.639184409856957,
|
|
"grad_norm": 1.701794147491455,
|
|
"learning_rate": 6.479915261317298e-05,
|
|
"loss": 0.0351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 4.650639327596329,
|
|
"grad_norm": 1.3405938148498535,
|
|
"learning_rate": 6.460806957155037e-05,
|
|
"loss": 0.0355,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 4.662094245335701,
|
|
"grad_norm": 1.725538730621338,
|
|
"learning_rate": 6.441675298067128e-05,
|
|
"loss": 0.0348,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 4.673549163075073,
|
|
"grad_norm": 1.583162784576416,
|
|
"learning_rate": 6.422520589924564e-05,
|
|
"loss": 0.0344,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 4.685004080814444,
|
|
"grad_norm": 1.4338629245758057,
|
|
"learning_rate": 6.403343138966841e-05,
|
|
"loss": 0.0353,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 4.696458998553816,
|
|
"grad_norm": 2.6246755123138428,
|
|
"learning_rate": 6.384143251797056e-05,
|
|
"loss": 0.0363,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 4.7079139162931884,
|
|
"grad_norm": 2.1834542751312256,
|
|
"learning_rate": 6.364921235377016e-05,
|
|
"loss": 0.0343,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 4.719368834032561,
|
|
"grad_norm": 1.6738187074661255,
|
|
"learning_rate": 6.345677397022315e-05,
|
|
"loss": 0.0351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 4.730823751771933,
|
|
"grad_norm": 1.6495721340179443,
|
|
"learning_rate": 6.326412044397438e-05,
|
|
"loss": 0.0366,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 4.742278669511305,
|
|
"grad_norm": 1.7878650426864624,
|
|
"learning_rate": 6.307125485510828e-05,
|
|
"loss": 0.0338,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 4.753733587250677,
|
|
"grad_norm": 2.035374641418457,
|
|
"learning_rate": 6.287818028709967e-05,
|
|
"loss": 0.0371,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 4.765188504990048,
|
|
"grad_norm": 2.248223304748535,
|
|
"learning_rate": 6.268489982676446e-05,
|
|
"loss": 0.0374,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 4.77664342272942,
|
|
"grad_norm": 2.056480646133423,
|
|
"learning_rate": 6.249141656421035e-05,
|
|
"loss": 0.0353,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 4.7880983404687925,
|
|
"grad_norm": 1.4961349964141846,
|
|
"learning_rate": 6.229773359278735e-05,
|
|
"loss": 0.037,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 4.799553258208165,
|
|
"grad_norm": 1.207465410232544,
|
|
"learning_rate": 6.210385400903836e-05,
|
|
"loss": 0.0344,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 4.811008175947537,
|
|
"grad_norm": 1.933811902999878,
|
|
"learning_rate": 6.190978091264959e-05,
|
|
"loss": 0.0338,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 4.822463093686909,
|
|
"grad_norm": 1.5286064147949219,
|
|
"learning_rate": 6.171551740640115e-05,
|
|
"loss": 0.033,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 4.83391801142628,
|
|
"grad_norm": 1.4746378660202026,
|
|
"learning_rate": 6.152106659611736e-05,
|
|
"loss": 0.035,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 4.845372929165652,
|
|
"grad_norm": 1.964225172996521,
|
|
"learning_rate": 6.132643159061707e-05,
|
|
"loss": 0.0336,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 4.856827846905024,
|
|
"grad_norm": 1.239408016204834,
|
|
"learning_rate": 6.1131615501664e-05,
|
|
"loss": 0.0321,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 4.8682827646443965,
|
|
"grad_norm": 2.219224452972412,
|
|
"learning_rate": 6.093662144391695e-05,
|
|
"loss": 0.0371,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 4.879737682383769,
|
|
"grad_norm": 1.2696152925491333,
|
|
"learning_rate": 6.074145253488006e-05,
|
|
"loss": 0.0338,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 4.891192600123141,
|
|
"grad_norm": 0.5583789944648743,
|
|
"learning_rate": 6.054611189485293e-05,
|
|
"loss": 0.0351,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 4.902647517862512,
|
|
"grad_norm": 1.4981776475906372,
|
|
"learning_rate": 6.035060264688075e-05,
|
|
"loss": 0.0321,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 4.914102435601884,
|
|
"grad_norm": 1.6405904293060303,
|
|
"learning_rate": 6.0154927916704304e-05,
|
|
"loss": 0.0339,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 4.925557353341256,
|
|
"grad_norm": 1.264320731163025,
|
|
"learning_rate": 5.9959090832710155e-05,
|
|
"loss": 0.0319,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 4.937012271080628,
|
|
"grad_norm": 2.039963722229004,
|
|
"learning_rate": 5.9763094525880426e-05,
|
|
"loss": 0.0344,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 4.9484671888200005,
|
|
"grad_norm": 1.5706747770309448,
|
|
"learning_rate": 5.956694212974292e-05,
|
|
"loss": 0.0334,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 4.959922106559373,
|
|
"grad_norm": 2.058473587036133,
|
|
"learning_rate": 5.937063678032093e-05,
|
|
"loss": 0.0335,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 4.971377024298745,
|
|
"grad_norm": 1.5394372940063477,
|
|
"learning_rate": 5.9174181616083066e-05,
|
|
"loss": 0.0337,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 4.982831942038116,
|
|
"grad_norm": 2.087599992752075,
|
|
"learning_rate": 5.89775797778932e-05,
|
|
"loss": 0.0341,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 4.994286859777488,
|
|
"grad_norm": 1.8887306451797485,
|
|
"learning_rate": 5.878083440896015e-05,
|
|
"loss": 0.0327,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 5.005727458869686,
|
|
"grad_norm": 1.0491043329238892,
|
|
"learning_rate": 5.858394865478745e-05,
|
|
"loss": 0.0263,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 5.017182376609058,
|
|
"grad_norm": 1.3754074573516846,
|
|
"learning_rate": 5.8386925663123104e-05,
|
|
"loss": 0.0157,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 5.02863729434843,
|
|
"grad_norm": 0.8756074905395508,
|
|
"learning_rate": 5.818976858390918e-05,
|
|
"loss": 0.0184,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 5.040092212087802,
|
|
"grad_norm": 0.9743272066116333,
|
|
"learning_rate": 5.7992480569231514e-05,
|
|
"loss": 0.018,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 5.051547129827174,
|
|
"grad_norm": 1.1601800918579102,
|
|
"learning_rate": 5.779506477326933e-05,
|
|
"loss": 0.0177,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22050
|
|
},
|
|
{
|
|
"epoch": 5.063002047566546,
|
|
"grad_norm": 1.3135687112808228,
|
|
"learning_rate": 5.7597524352244734e-05,
|
|
"loss": 0.0191,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 5.0744569653059175,
|
|
"grad_norm": 1.3936012983322144,
|
|
"learning_rate": 5.7399862464372324e-05,
|
|
"loss": 0.0184,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22150
|
|
},
|
|
{
|
|
"epoch": 5.08591188304529,
|
|
"grad_norm": 0.9932096600532532,
|
|
"learning_rate": 5.720208226980864e-05,
|
|
"loss": 0.0186,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 5.097366800784662,
|
|
"grad_norm": 1.0546112060546875,
|
|
"learning_rate": 5.700418693060173e-05,
|
|
"loss": 0.0194,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22250
|
|
},
|
|
{
|
|
"epoch": 5.108821718524034,
|
|
"grad_norm": 0.8949224948883057,
|
|
"learning_rate": 5.6806179610640486e-05,
|
|
"loss": 0.0187,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 5.120276636263406,
|
|
"grad_norm": 0.9786812663078308,
|
|
"learning_rate": 5.660806347560416e-05,
|
|
"loss": 0.0176,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22350
|
|
},
|
|
{
|
|
"epoch": 5.131731554002778,
|
|
"grad_norm": 0.9927299618721008,
|
|
"learning_rate": 5.6409841692911625e-05,
|
|
"loss": 0.0195,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 5.143186471742149,
|
|
"grad_norm": 1.1186367273330688,
|
|
"learning_rate": 5.621151743167091e-05,
|
|
"loss": 0.0189,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22450
|
|
},
|
|
{
|
|
"epoch": 5.1546413894815215,
|
|
"grad_norm": 1.2558495998382568,
|
|
"learning_rate": 5.60130938626284e-05,
|
|
"loss": 0.0195,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 5.166096307220894,
|
|
"grad_norm": 1.2264378070831299,
|
|
"learning_rate": 5.581457415811815e-05,
|
|
"loss": 0.0198,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22550
|
|
},
|
|
{
|
|
"epoch": 5.177551224960266,
|
|
"grad_norm": 1.243323564529419,
|
|
"learning_rate": 5.561596149201127e-05,
|
|
"loss": 0.0187,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 5.189006142699638,
|
|
"grad_norm": 0.641426682472229,
|
|
"learning_rate": 5.541725903966504e-05,
|
|
"loss": 0.0183,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22650
|
|
},
|
|
{
|
|
"epoch": 5.20046106043901,
|
|
"grad_norm": 0.5407239198684692,
|
|
"learning_rate": 5.521846997787223e-05,
|
|
"loss": 0.019,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 5.211915978178382,
|
|
"grad_norm": 1.1588449478149414,
|
|
"learning_rate": 5.501959748481035e-05,
|
|
"loss": 0.0203,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22750
|
|
},
|
|
{
|
|
"epoch": 5.223370895917753,
|
|
"grad_norm": 1.5353953838348389,
|
|
"learning_rate": 5.482064473999071e-05,
|
|
"loss": 0.0197,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 5.2348258136571255,
|
|
"grad_norm": 1.5715053081512451,
|
|
"learning_rate": 5.462161492420772e-05,
|
|
"loss": 0.0205,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22850
|
|
},
|
|
{
|
|
"epoch": 5.246280731396498,
|
|
"grad_norm": 0.8576170206069946,
|
|
"learning_rate": 5.442251121948793e-05,
|
|
"loss": 0.0198,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22900
|
|
},
|
|
{
|
|
"epoch": 5.25773564913587,
|
|
"grad_norm": 1.722284197807312,
|
|
"learning_rate": 5.422333680903921e-05,
|
|
"loss": 0.0194,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 22950
|
|
},
|
|
{
|
|
"epoch": 5.269190566875242,
|
|
"grad_norm": 1.3785938024520874,
|
|
"learning_rate": 5.4024094877199884e-05,
|
|
"loss": 0.0204,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 5.280645484614614,
|
|
"grad_norm": 0.8565208911895752,
|
|
"learning_rate": 5.382478860938776e-05,
|
|
"loss": 0.0187,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23050
|
|
},
|
|
{
|
|
"epoch": 5.292100402353985,
|
|
"grad_norm": 1.519986629486084,
|
|
"learning_rate": 5.362542119204924e-05,
|
|
"loss": 0.0204,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 5.303555320093357,
|
|
"grad_norm": 0.8362240791320801,
|
|
"learning_rate": 5.3425995812608355e-05,
|
|
"loss": 0.0188,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23150
|
|
},
|
|
{
|
|
"epoch": 5.3150102378327295,
|
|
"grad_norm": 1.1630821228027344,
|
|
"learning_rate": 5.3226515659415824e-05,
|
|
"loss": 0.0193,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23200
|
|
},
|
|
{
|
|
"epoch": 5.326465155572102,
|
|
"grad_norm": 0.8801319599151611,
|
|
"learning_rate": 5.302698392169806e-05,
|
|
"loss": 0.0179,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23250
|
|
},
|
|
{
|
|
"epoch": 5.337920073311474,
|
|
"grad_norm": 1.0547822713851929,
|
|
"learning_rate": 5.2827403789506234e-05,
|
|
"loss": 0.0203,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23300
|
|
},
|
|
{
|
|
"epoch": 5.349374991050846,
|
|
"grad_norm": 1.2050660848617554,
|
|
"learning_rate": 5.262777845366515e-05,
|
|
"loss": 0.0189,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23350
|
|
},
|
|
{
|
|
"epoch": 5.360829908790217,
|
|
"grad_norm": 1.1393359899520874,
|
|
"learning_rate": 5.242811110572242e-05,
|
|
"loss": 0.0199,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 5.372284826529589,
|
|
"grad_norm": 0.7587368488311768,
|
|
"learning_rate": 5.2228404937897235e-05,
|
|
"loss": 0.0182,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23450
|
|
},
|
|
{
|
|
"epoch": 5.383739744268961,
|
|
"grad_norm": 1.5062503814697266,
|
|
"learning_rate": 5.20286631430295e-05,
|
|
"loss": 0.0202,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"epoch": 5.3951946620083335,
|
|
"grad_norm": 0.8290795087814331,
|
|
"learning_rate": 5.1828888914528674e-05,
|
|
"loss": 0.0197,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23550
|
|
},
|
|
{
|
|
"epoch": 5.406649579747706,
|
|
"grad_norm": 1.096450924873352,
|
|
"learning_rate": 5.162908544632274e-05,
|
|
"loss": 0.0194,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23600
|
|
},
|
|
{
|
|
"epoch": 5.418104497487078,
|
|
"grad_norm": 0.8339506387710571,
|
|
"learning_rate": 5.142925593280722e-05,
|
|
"loss": 0.0206,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23650
|
|
},
|
|
{
|
|
"epoch": 5.429559415226449,
|
|
"grad_norm": 1.8221694231033325,
|
|
"learning_rate": 5.1229403568793963e-05,
|
|
"loss": 0.02,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 5.441014332965821,
|
|
"grad_norm": 2.2157158851623535,
|
|
"learning_rate": 5.1029531549460205e-05,
|
|
"loss": 0.0208,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23750
|
|
},
|
|
{
|
|
"epoch": 5.452469250705193,
|
|
"grad_norm": 1.0183664560317993,
|
|
"learning_rate": 5.0829643070297415e-05,
|
|
"loss": 0.0192,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23800
|
|
},
|
|
{
|
|
"epoch": 5.463924168444565,
|
|
"grad_norm": 0.8894338011741638,
|
|
"learning_rate": 5.062974132706016e-05,
|
|
"loss": 0.0188,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23850
|
|
},
|
|
{
|
|
"epoch": 5.4753790861839375,
|
|
"grad_norm": 1.4340115785598755,
|
|
"learning_rate": 5.042982951571515e-05,
|
|
"loss": 0.0188,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23900
|
|
},
|
|
{
|
|
"epoch": 5.48683400392331,
|
|
"grad_norm": 1.343416690826416,
|
|
"learning_rate": 5.022991083239002e-05,
|
|
"loss": 0.0204,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 23950
|
|
},
|
|
{
|
|
"epoch": 5.498288921662681,
|
|
"grad_norm": 1.0022575855255127,
|
|
"learning_rate": 5.0029988473322256e-05,
|
|
"loss": 0.0196,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 5.509743839402053,
|
|
"grad_norm": 1.0293878316879272,
|
|
"learning_rate": 4.9830065634808144e-05,
|
|
"loss": 0.0185,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24050
|
|
},
|
|
{
|
|
"epoch": 5.521198757141425,
|
|
"grad_norm": 1.5076055526733398,
|
|
"learning_rate": 4.963014551315163e-05,
|
|
"loss": 0.018,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24100
|
|
},
|
|
{
|
|
"epoch": 5.532653674880797,
|
|
"grad_norm": 1.333103895187378,
|
|
"learning_rate": 4.943023130461317e-05,
|
|
"loss": 0.0189,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24150
|
|
},
|
|
{
|
|
"epoch": 5.544108592620169,
|
|
"grad_norm": 1.848940134048462,
|
|
"learning_rate": 4.9230326205358794e-05,
|
|
"loss": 0.0191,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24200
|
|
},
|
|
{
|
|
"epoch": 5.5555635103595415,
|
|
"grad_norm": 1.3931152820587158,
|
|
"learning_rate": 4.903043341140879e-05,
|
|
"loss": 0.0199,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24250
|
|
},
|
|
{
|
|
"epoch": 5.567018428098914,
|
|
"grad_norm": 1.2779439687728882,
|
|
"learning_rate": 4.883055611858676e-05,
|
|
"loss": 0.0181,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 5.578473345838285,
|
|
"grad_norm": 1.221993088722229,
|
|
"learning_rate": 4.8630697522468455e-05,
|
|
"loss": 0.0201,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24350
|
|
},
|
|
{
|
|
"epoch": 5.589928263577657,
|
|
"grad_norm": 1.5948313474655151,
|
|
"learning_rate": 4.8430860818330756e-05,
|
|
"loss": 0.0192,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24400
|
|
},
|
|
{
|
|
"epoch": 5.601383181317029,
|
|
"grad_norm": 1.4784343242645264,
|
|
"learning_rate": 4.823104920110049e-05,
|
|
"loss": 0.0195,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24450
|
|
},
|
|
{
|
|
"epoch": 5.612838099056401,
|
|
"grad_norm": 1.4098366498947144,
|
|
"learning_rate": 4.8031265865303434e-05,
|
|
"loss": 0.0201,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"epoch": 5.624293016795773,
|
|
"grad_norm": 1.5338062047958374,
|
|
"learning_rate": 4.783151400501319e-05,
|
|
"loss": 0.0196,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24550
|
|
},
|
|
{
|
|
"epoch": 5.635747934535145,
|
|
"grad_norm": 1.1229875087738037,
|
|
"learning_rate": 4.763179681380016e-05,
|
|
"loss": 0.0188,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 5.647202852274517,
|
|
"grad_norm": 1.63667893409729,
|
|
"learning_rate": 4.7432117484680434e-05,
|
|
"loss": 0.02,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24650
|
|
},
|
|
{
|
|
"epoch": 5.658657770013889,
|
|
"grad_norm": 0.6071897745132446,
|
|
"learning_rate": 4.723247921006483e-05,
|
|
"loss": 0.0202,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24700
|
|
},
|
|
{
|
|
"epoch": 5.670112687753261,
|
|
"grad_norm": 1.3240978717803955,
|
|
"learning_rate": 4.703288518170774e-05,
|
|
"loss": 0.019,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24750
|
|
},
|
|
{
|
|
"epoch": 5.681567605492633,
|
|
"grad_norm": 0.9332528114318848,
|
|
"learning_rate": 4.683333859065621e-05,
|
|
"loss": 0.0189,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24800
|
|
},
|
|
{
|
|
"epoch": 5.693022523232005,
|
|
"grad_norm": 1.2496166229248047,
|
|
"learning_rate": 4.663384262719881e-05,
|
|
"loss": 0.0183,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24850
|
|
},
|
|
{
|
|
"epoch": 5.704477440971377,
|
|
"grad_norm": 1.2651309967041016,
|
|
"learning_rate": 4.643440048081478e-05,
|
|
"loss": 0.0201,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 5.715932358710749,
|
|
"grad_norm": 1.1634583473205566,
|
|
"learning_rate": 4.623501534012287e-05,
|
|
"loss": 0.02,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 24950
|
|
},
|
|
{
|
|
"epoch": 5.727387276450121,
|
|
"grad_norm": 1.3410481214523315,
|
|
"learning_rate": 4.60356903928305e-05,
|
|
"loss": 0.0187,
|
|
"memory/device_mem_reserved(gib)": 53.25,
|
|
"memory/max_mem_active(gib)": 48.21,
|
|
"memory/max_mem_allocated(gib)": 48.21,
|
|
"step": 25000
|
|
}
|
|
],
|
|
"logging_steps": 50,
|
|
"max_steps": 43649,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 10,
|
|
"save_steps": 1000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.430940996497441e+18,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|