4115 lines
96 KiB
JSON
4115 lines
96 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 500.0,
|
|
"eval_steps": 500,
|
|
"global_step": 5000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 9.25,
|
|
"learning_rate": 1.8e-05,
|
|
"loss": 2.605172348022461,
|
|
"loss_d0": 2.6139824271202086,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 4.8125,
|
|
"learning_rate": 3.8e-05,
|
|
"loss": 1.1845547676086425,
|
|
"loss_d0": 1.1885395765304565,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 3.65625,
|
|
"learning_rate": 5.8e-05,
|
|
"loss": 0.8508452415466309,
|
|
"loss_d0": 0.8536352932453155,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 3.46875,
|
|
"learning_rate": 7.800000000000001e-05,
|
|
"loss": 0.6966594219207763,
|
|
"loss_d0": 0.6979476511478424,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 3.296875,
|
|
"learning_rate": 9.8e-05,
|
|
"loss": 0.5743978500366211,
|
|
"loss_d0": 0.5776701003313065,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 3.71875,
|
|
"learning_rate": 0.000118,
|
|
"loss": 0.4979101657867432,
|
|
"loss_d0": 0.4996922880411148,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 3.25,
|
|
"learning_rate": 0.000138,
|
|
"loss": 0.4397528648376465,
|
|
"loss_d0": 0.4406041353940964,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"grad_norm": 2.5625,
|
|
"learning_rate": 0.00015800000000000002,
|
|
"loss": 0.3697507381439209,
|
|
"loss_d0": 0.3696742236614227,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"grad_norm": 2.9375,
|
|
"learning_rate": 0.00017800000000000002,
|
|
"loss": 0.31113204956054685,
|
|
"loss_d0": 0.31142298579216005,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 10.0,
|
|
"grad_norm": 2.25,
|
|
"learning_rate": 0.00019800000000000002,
|
|
"loss": 0.2800392389297485,
|
|
"loss_d0": 0.27933542132377626,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 11.0,
|
|
"grad_norm": 1.8203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.2221465826034546,
|
|
"loss_d0": 0.22203450053930282,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"grad_norm": 2.0625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.1953430414199829,
|
|
"loss_d0": 0.19439931064844132,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.14305418729782104,
|
|
"loss_d0": 0.14241147190332412,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 14.0,
|
|
"grad_norm": 1.609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.1502935767173767,
|
|
"loss_d0": 0.14951273798942566,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 15.0,
|
|
"grad_norm": 2.15625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.14393000602722167,
|
|
"loss_d0": 0.14211773499846458,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"grad_norm": 1.5390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.1161999225616455,
|
|
"loss_d0": 0.11462540775537491,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"grad_norm": 0.7734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.1050883412361145,
|
|
"loss_d0": 0.10514769107103347,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 18.0,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.10532078742980958,
|
|
"loss_d0": 0.10545785427093506,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 19.0,
|
|
"grad_norm": 1.375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.11194120645523072,
|
|
"loss_d0": 0.11231792494654655,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"grad_norm": 1.0078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.10956100225448609,
|
|
"loss_d0": 0.11055244281888008,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"grad_norm": 1.109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.09398337006568909,
|
|
"loss_d0": 0.09433126747608185,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 22.0,
|
|
"grad_norm": 0.66796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.08510669469833373,
|
|
"loss_d0": 0.08516838252544404,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 23.0,
|
|
"grad_norm": 1.796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07973664999008179,
|
|
"loss_d0": 0.0800891250371933,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"grad_norm": 0.76953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07982662916183472,
|
|
"loss_d0": 0.08119344227015972,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"grad_norm": 0.578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.08493419289588929,
|
|
"loss_d0": 0.08543153777718544,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 26.0,
|
|
"grad_norm": 0.6640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06563451290130615,
|
|
"loss_d0": 0.06554836891591549,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 27.0,
|
|
"grad_norm": 0.99609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.08964254260063172,
|
|
"loss_d0": 0.08905367143452167,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"grad_norm": 1.4375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.08700705170631409,
|
|
"loss_d0": 0.08580705337226391,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 29.0,
|
|
"grad_norm": 0.97265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.08369559049606323,
|
|
"loss_d0": 0.08155160546302795,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 30.0,
|
|
"grad_norm": 0.58203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07587954998016358,
|
|
"loss_d0": 0.0753675114363432,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 31.0,
|
|
"grad_norm": 0.9921875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06574047803878784,
|
|
"loss_d0": 0.06600831300020218,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"grad_norm": 0.68359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06255401968955994,
|
|
"loss_d0": 0.06289612613618374,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 33.0,
|
|
"grad_norm": 0.5859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06944599151611328,
|
|
"loss_d0": 0.07013467662036418,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 34.0,
|
|
"grad_norm": 0.392578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0734113335609436,
|
|
"loss_d0": 0.07388503737747669,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 35.0,
|
|
"grad_norm": 1.625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06372126340866088,
|
|
"loss_d0": 0.06445319131016732,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 36.0,
|
|
"grad_norm": 0.859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06621803045272827,
|
|
"loss_d0": 0.0674049399793148,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 37.0,
|
|
"grad_norm": 0.494140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07585157752037049,
|
|
"loss_d0": 0.07674749866127968,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 38.0,
|
|
"grad_norm": 0.8671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07490594983100891,
|
|
"loss_d0": 0.07461650408804417,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 39.0,
|
|
"grad_norm": 0.474609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06459608674049377,
|
|
"loss_d0": 0.06494694538414478,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"grad_norm": 0.875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0653274655342102,
|
|
"loss_d0": 0.06574108265340328,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 41.0,
|
|
"grad_norm": 1.8046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.08319691419601441,
|
|
"loss_d0": 0.0816740058362484,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 42.0,
|
|
"grad_norm": 0.427734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.058042091131210324,
|
|
"loss_d0": 0.058284175023436545,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 43.0,
|
|
"grad_norm": 0.48828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05881038308143616,
|
|
"loss_d0": 0.05877893678843975,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 44.0,
|
|
"grad_norm": 0.376953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05556913018226624,
|
|
"loss_d0": 0.05579867213964462,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 45.0,
|
|
"grad_norm": 0.5,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0559271514415741,
|
|
"loss_d0": 0.0562079343944788,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 46.0,
|
|
"grad_norm": 0.47265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0503437340259552,
|
|
"loss_d0": 0.050457949936389926,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 47.0,
|
|
"grad_norm": 0.4296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04916974902153015,
|
|
"loss_d0": 0.0493311133235693,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 48.0,
|
|
"grad_norm": 0.8359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.060026037693023684,
|
|
"loss_d0": 0.059200653806328773,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 49.0,
|
|
"grad_norm": 0.40625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06555094122886658,
|
|
"loss_d0": 0.06555219888687133,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 50.0,
|
|
"grad_norm": 0.78125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07094892263412475,
|
|
"loss_d0": 0.07080870307981968,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 50.0,
|
|
"eval_loss": 9.149801254272461,
|
|
"eval_runtime": 0.6889,
|
|
"eval_samples_per_second": 725.825,
|
|
"eval_steps_per_second": 72.582,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 51.0,
|
|
"grad_norm": 0.68359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07003722190856934,
|
|
"loss_d0": 0.07078699246048928,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 52.0,
|
|
"grad_norm": 0.65234375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0648545503616333,
|
|
"loss_d0": 0.06463338956236839,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 53.0,
|
|
"grad_norm": 0.54296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05929445028305054,
|
|
"loss_d0": 0.0596495222300291,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 54.0,
|
|
"grad_norm": 0.5078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05196449756622314,
|
|
"loss_d0": 0.05247226879000664,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 55.0,
|
|
"grad_norm": 0.7734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05879771709442139,
|
|
"loss_d0": 0.05922210738062859,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 56.0,
|
|
"grad_norm": 0.5078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06885148882865906,
|
|
"loss_d0": 0.07016028575599194,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 57.0,
|
|
"grad_norm": 0.83203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.057416903972625735,
|
|
"loss_d0": 0.05888371020555496,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 58.0,
|
|
"grad_norm": 0.6171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.057390010356903075,
|
|
"loss_d0": 0.05848095864057541,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 59.0,
|
|
"grad_norm": 0.3984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.049796289205551146,
|
|
"loss_d0": 0.05020042285323143,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 60.0,
|
|
"grad_norm": 0.3828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05940237045288086,
|
|
"loss_d0": 0.06075261794030666,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 61.0,
|
|
"grad_norm": 0.35546875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.057738131284713744,
|
|
"loss_d0": 0.05881649628281593,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 62.0,
|
|
"grad_norm": 0.470703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.062183260917663574,
|
|
"loss_d0": 0.06322281733155251,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 63.0,
|
|
"grad_norm": 0.78515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05927368402481079,
|
|
"loss_d0": 0.0597139336168766,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 64.0,
|
|
"grad_norm": 0.58203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.058104443550109866,
|
|
"loss_d0": 0.05876607708632946,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 65.0,
|
|
"grad_norm": 0.3828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05966512560844421,
|
|
"loss_d0": 0.060669278353452684,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 66.0,
|
|
"grad_norm": 0.498046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05417026281356811,
|
|
"loss_d0": 0.054636499658226964,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 67.0,
|
|
"grad_norm": 0.392578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05017418265342712,
|
|
"loss_d0": 0.050396521016955374,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 68.0,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04814895987510681,
|
|
"loss_d0": 0.04857309609651565,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 69.0,
|
|
"grad_norm": 0.396484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05414179563522339,
|
|
"loss_d0": 0.05384636260569096,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 70.0,
|
|
"grad_norm": 0.5703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05307164788246155,
|
|
"loss_d0": 0.05355789102613926,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 71.0,
|
|
"grad_norm": 0.33984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05436263084411621,
|
|
"loss_d0": 0.05512550659477711,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 72.0,
|
|
"grad_norm": 0.5703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05032788515090943,
|
|
"loss_d0": 0.050449307262897494,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 73.0,
|
|
"grad_norm": 0.400390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.051280814409255984,
|
|
"loss_d0": 0.05181795097887516,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 74.0,
|
|
"grad_norm": 0.3828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05536478161811829,
|
|
"loss_d0": 0.05605713278055191,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 75.0,
|
|
"grad_norm": 0.5078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05402403473854065,
|
|
"loss_d0": 0.054415644705295564,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 76.0,
|
|
"grad_norm": 0.54296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.057246971130371097,
|
|
"loss_d0": 0.057024940848350525,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 77.0,
|
|
"grad_norm": 0.353515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.053191614151000974,
|
|
"loss_d0": 0.053360605239868165,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 78.0,
|
|
"grad_norm": 0.5546875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05366742014884949,
|
|
"loss_d0": 0.05328587256371975,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 79.0,
|
|
"grad_norm": 0.5,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.06317275166511535,
|
|
"loss_d0": 0.06282185427844525,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 80.0,
|
|
"grad_norm": 0.5703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07268043756484985,
|
|
"loss_d0": 0.07160068228840828,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 81.0,
|
|
"grad_norm": 0.47265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05127843022346497,
|
|
"loss_d0": 0.051354449987411496,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 82.0,
|
|
"grad_norm": 0.44140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.045409074425697325,
|
|
"loss_d0": 0.045082954317331315,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 83.0,
|
|
"grad_norm": 0.4296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04334630072116852,
|
|
"loss_d0": 0.04350667372345925,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 84.0,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042599648237228394,
|
|
"loss_d0": 0.042730527743697164,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 85.0,
|
|
"grad_norm": 0.484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04336960911750794,
|
|
"loss_d0": 0.043284989148378375,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 86.0,
|
|
"grad_norm": 0.484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04642752707004547,
|
|
"loss_d0": 0.04698342382907868,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 87.0,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04236462116241455,
|
|
"loss_d0": 0.04291092492640018,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 88.0,
|
|
"grad_norm": 0.44140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0467838853597641,
|
|
"loss_d0": 0.04719291441142559,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 89.0,
|
|
"grad_norm": 0.431640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.056033474206924436,
|
|
"loss_d0": 0.0564144778996706,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 90.0,
|
|
"grad_norm": 0.359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05397940874099731,
|
|
"loss_d0": 0.05476293601095676,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 91.0,
|
|
"grad_norm": 0.44140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04703973531723023,
|
|
"loss_d0": 0.04744415730237961,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 92.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05714722275733948,
|
|
"loss_d0": 0.05776938088238239,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 93.0,
|
|
"grad_norm": 0.423828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.049974143505096436,
|
|
"loss_d0": 0.05034521222114563,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 94.0,
|
|
"grad_norm": 0.38671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05092711448669433,
|
|
"loss_d0": 0.0512014877051115,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 95.0,
|
|
"grad_norm": 0.400390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04760122001171112,
|
|
"loss_d0": 0.048098673298954966,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 96.0,
|
|
"grad_norm": 0.333984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0453918844461441,
|
|
"loss_d0": 0.04554104544222355,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 97.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.045323750376701354,
|
|
"loss_d0": 0.04565862752497196,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 98.0,
|
|
"grad_norm": 0.466796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04769502282142639,
|
|
"loss_d0": 0.047993503510951996,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 99.0,
|
|
"grad_norm": 0.384765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04582420587539673,
|
|
"loss_d0": 0.045967242866754535,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 100.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04648930430412292,
|
|
"loss_d0": 0.04681434221565724,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 100.0,
|
|
"eval_loss": 8.996453285217285,
|
|
"eval_runtime": 0.6897,
|
|
"eval_samples_per_second": 724.945,
|
|
"eval_steps_per_second": 72.494,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 101.0,
|
|
"grad_norm": 0.50390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.047931820154190063,
|
|
"loss_d0": 0.048335249349474904,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 102.0,
|
|
"grad_norm": 0.328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04393635094165802,
|
|
"loss_d0": 0.04436287619173527,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 103.0,
|
|
"grad_norm": 0.380859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.052803754806518555,
|
|
"loss_d0": 0.05395218767225742,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 104.0,
|
|
"grad_norm": 0.427734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.046474286913871767,
|
|
"loss_d0": 0.046956886723637584,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 105.0,
|
|
"grad_norm": 0.359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04853596985340118,
|
|
"loss_d0": 0.0489469937980175,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 106.0,
|
|
"grad_norm": 0.62890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.050303131341934204,
|
|
"loss_d0": 0.050602763146162036,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 107.0,
|
|
"grad_norm": 0.515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.062167507410049436,
|
|
"loss_d0": 0.06302939765155316,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 108.0,
|
|
"grad_norm": 0.375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05572482943534851,
|
|
"loss_d0": 0.0563100803643465,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 109.0,
|
|
"grad_norm": 0.328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04392791986465454,
|
|
"loss_d0": 0.04417993500828743,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 110.0,
|
|
"grad_norm": 0.3515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.048283118009567264,
|
|
"loss_d0": 0.048720812797546385,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 111.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042201068997383115,
|
|
"loss_d0": 0.04266498349606991,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 112.0,
|
|
"grad_norm": 0.228515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04001366794109344,
|
|
"loss_d0": 0.040179040282964706,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 113.0,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04639661908149719,
|
|
"loss_d0": 0.04656643345952034,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 114.0,
|
|
"grad_norm": 0.48828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04307742714881897,
|
|
"loss_d0": 0.04315165765583515,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 115.0,
|
|
"grad_norm": 0.5390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.047644132375717164,
|
|
"loss_d0": 0.04783033281564712,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 116.0,
|
|
"grad_norm": 0.58984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05251736044883728,
|
|
"loss_d0": 0.05270914658904076,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 117.0,
|
|
"grad_norm": 0.640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0490668922662735,
|
|
"loss_d0": 0.049511789530515674,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 118.0,
|
|
"grad_norm": 0.44921875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0441941112279892,
|
|
"loss_d0": 0.04463861547410488,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 119.0,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04082232713699341,
|
|
"loss_d0": 0.0409642331302166,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 120.0,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04445215463638306,
|
|
"loss_d0": 0.044935400038957594,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 121.0,
|
|
"grad_norm": 0.484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04362497329711914,
|
|
"loss_d0": 0.04389031082391739,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 122.0,
|
|
"grad_norm": 0.58984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05719525814056396,
|
|
"loss_d0": 0.05802029110491276,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 123.0,
|
|
"grad_norm": 0.953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.058700555562973024,
|
|
"loss_d0": 0.059645514190196994,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 124.0,
|
|
"grad_norm": 0.78515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.049953502416610715,
|
|
"loss_d0": 0.05056100562214851,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 125.0,
|
|
"grad_norm": 0.494140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04485623240470886,
|
|
"loss_d0": 0.045016249269247056,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 126.0,
|
|
"grad_norm": 0.4140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04192852973937988,
|
|
"loss_d0": 0.0420475821942091,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 127.0,
|
|
"grad_norm": 0.455078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0426062673330307,
|
|
"loss_d0": 0.042881960049271584,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 128.0,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04536706209182739,
|
|
"loss_d0": 0.04527593217790127,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 129.0,
|
|
"grad_norm": 0.318359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05174095630645752,
|
|
"loss_d0": 0.05235871635377407,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 130.0,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042698603868484494,
|
|
"loss_d0": 0.04338001646101475,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 131.0,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04018429815769196,
|
|
"loss_d0": 0.04045051150023937,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 132.0,
|
|
"grad_norm": 0.3515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039610669016838074,
|
|
"loss_d0": 0.03991545438766479,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 133.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04048936069011688,
|
|
"loss_d0": 0.04079539142549038,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 134.0,
|
|
"grad_norm": 0.3203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042930704355239865,
|
|
"loss_d0": 0.043228012323379514,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 135.0,
|
|
"grad_norm": 0.49609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.047755110263824466,
|
|
"loss_d0": 0.047895029187202454,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 136.0,
|
|
"grad_norm": 0.388671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.055011457204818724,
|
|
"loss_d0": 0.05567521676421165,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 137.0,
|
|
"grad_norm": 0.5390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05911533832550049,
|
|
"loss_d0": 0.0588922031223774,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 138.0,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.07207316756248475,
|
|
"loss_d0": 0.07146050035953522,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 139.0,
|
|
"grad_norm": 0.6875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.051120662689208986,
|
|
"loss_d0": 0.05109778419137001,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 140.0,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05012030005455017,
|
|
"loss_d0": 0.0504388976842165,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 141.0,
|
|
"grad_norm": 0.369140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042303305864334104,
|
|
"loss_d0": 0.042646681889891624,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 142.0,
|
|
"grad_norm": 0.318359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04310626089572907,
|
|
"loss_d0": 0.04322606287896633,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 143.0,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.041630563139915464,
|
|
"loss_d0": 0.04180505834519863,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 144.0,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04187849760055542,
|
|
"loss_d0": 0.042144588008522985,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 145.0,
|
|
"grad_norm": 0.416015625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04202188551425934,
|
|
"loss_d0": 0.04231737479567528,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 146.0,
|
|
"grad_norm": 0.435546875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0439466267824173,
|
|
"loss_d0": 0.044225719198584555,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 147.0,
|
|
"grad_norm": 0.431640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04756100177764892,
|
|
"loss_d0": 0.04815598018467426,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 148.0,
|
|
"grad_norm": 0.37890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04747256338596344,
|
|
"loss_d0": 0.04799098074436188,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 149.0,
|
|
"grad_norm": 0.431640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.049269622564315795,
|
|
"loss_d0": 0.04934872798621655,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 150.0,
|
|
"grad_norm": 0.5,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04529989957809448,
|
|
"loss_d0": 0.045717564225196836,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 150.0,
|
|
"eval_loss": 9.852365493774414,
|
|
"eval_runtime": 0.6886,
|
|
"eval_samples_per_second": 726.092,
|
|
"eval_steps_per_second": 72.609,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 151.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.043574199080467224,
|
|
"loss_d0": 0.04385456591844559,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 152.0,
|
|
"grad_norm": 0.390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04235072135925293,
|
|
"loss_d0": 0.04293657392263413,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 153.0,
|
|
"grad_norm": 0.4140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04195387065410614,
|
|
"loss_d0": 0.042300010845065114,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 154.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04051635265350342,
|
|
"loss_d0": 0.040704548731446265,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 155.0,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04044683873653412,
|
|
"loss_d0": 0.04077310748398304,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 156.0,
|
|
"grad_norm": 0.359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04552633166313171,
|
|
"loss_d0": 0.0455584455281496,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 157.0,
|
|
"grad_norm": 0.359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0467070460319519,
|
|
"loss_d0": 0.04717182517051697,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 158.0,
|
|
"grad_norm": 0.474609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04355217814445496,
|
|
"loss_d0": 0.04367000050842762,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 159.0,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04726326167583465,
|
|
"loss_d0": 0.047322430461645124,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 160.0,
|
|
"grad_norm": 0.3984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05183404088020325,
|
|
"loss_d0": 0.051499960198998454,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 161.0,
|
|
"grad_norm": 0.62109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.052065759897232056,
|
|
"loss_d0": 0.05186050981283188,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 162.0,
|
|
"grad_norm": 0.41796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04411465525627136,
|
|
"loss_d0": 0.04437471702694893,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 163.0,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04183531403541565,
|
|
"loss_d0": 0.04211582764983177,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 164.0,
|
|
"grad_norm": 0.35546875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04270436465740204,
|
|
"loss_d0": 0.04318705834448337,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 165.0,
|
|
"grad_norm": 0.33984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04538227915763855,
|
|
"loss_d0": 0.04579868800938129,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 166.0,
|
|
"grad_norm": 0.451171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04416545033454895,
|
|
"loss_d0": 0.04440648853778839,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 167.0,
|
|
"grad_norm": 0.41796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0445246160030365,
|
|
"loss_d0": 0.04493884444236755,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 168.0,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04588344693183899,
|
|
"loss_d0": 0.046438657119870184,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 169.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04363830387592316,
|
|
"loss_d0": 0.04401036873459816,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 170.0,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.041232901811599734,
|
|
"loss_d0": 0.04151614680886269,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 171.0,
|
|
"grad_norm": 0.2412109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03885977864265442,
|
|
"loss_d0": 0.039185041561722755,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 172.0,
|
|
"grad_norm": 0.314453125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0402399480342865,
|
|
"loss_d0": 0.040488839522004126,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 173.0,
|
|
"grad_norm": 0.326171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04318315982818603,
|
|
"loss_d0": 0.04342842325568199,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 174.0,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040678870677947995,
|
|
"loss_d0": 0.04102524146437645,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 175.0,
|
|
"grad_norm": 0.341796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040768089890480044,
|
|
"loss_d0": 0.04122583419084549,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 176.0,
|
|
"grad_norm": 0.33984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042663860321044925,
|
|
"loss_d0": 0.04297072477638721,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 177.0,
|
|
"grad_norm": 0.412109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04289932250976562,
|
|
"loss_d0": 0.04324173927307129,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 178.0,
|
|
"grad_norm": 0.392578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04065240621566772,
|
|
"loss_d0": 0.04108826108276844,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 179.0,
|
|
"grad_norm": 0.310546875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04117431342601776,
|
|
"loss_d0": 0.04153142869472504,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 180.0,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03892770111560821,
|
|
"loss_d0": 0.03927576504647732,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 181.0,
|
|
"grad_norm": 0.396484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04309303760528564,
|
|
"loss_d0": 0.04333780445158482,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 182.0,
|
|
"grad_norm": 0.3515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04459149837493896,
|
|
"loss_d0": 0.04491332247853279,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 183.0,
|
|
"grad_norm": 0.48046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.046898290514945984,
|
|
"loss_d0": 0.047189544141292575,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 184.0,
|
|
"grad_norm": 0.353515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04611060917377472,
|
|
"loss_d0": 0.04624026641249657,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 185.0,
|
|
"grad_norm": 0.357421875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04319146275520325,
|
|
"loss_d0": 0.043569745123386384,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 186.0,
|
|
"grad_norm": 0.59375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04202109277248382,
|
|
"loss_d0": 0.04252335019409657,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 187.0,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04427561163902283,
|
|
"loss_d0": 0.04493751563131809,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 188.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04414681792259216,
|
|
"loss_d0": 0.044181046262383464,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 189.0,
|
|
"grad_norm": 0.314453125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05471844673156738,
|
|
"loss_d0": 0.05483967214822769,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 190.0,
|
|
"grad_norm": 0.38671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04385135769844055,
|
|
"loss_d0": 0.04406722001731396,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 191.0,
|
|
"grad_norm": 0.66015625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04954864680767059,
|
|
"loss_d0": 0.04996456205844879,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 192.0,
|
|
"grad_norm": 0.42578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04157604873180389,
|
|
"loss_d0": 0.042001275718212126,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 193.0,
|
|
"grad_norm": 0.61328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03958460390567779,
|
|
"loss_d0": 0.040064525604248044,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 194.0,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03781185150146484,
|
|
"loss_d0": 0.03814610652625561,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 195.0,
|
|
"grad_norm": 0.3359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.038428235054016116,
|
|
"loss_d0": 0.038876712694764136,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 196.0,
|
|
"grad_norm": 0.44140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039642858505249026,
|
|
"loss_d0": 0.040048804879188535,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 197.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040806761384010314,
|
|
"loss_d0": 0.04136274456977844,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 198.0,
|
|
"grad_norm": 0.328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040471208095550534,
|
|
"loss_d0": 0.04059889316558838,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 199.0,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039885866641998294,
|
|
"loss_d0": 0.040052902325987814,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 200.0,
|
|
"grad_norm": 0.4609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0434266984462738,
|
|
"loss_d0": 0.043684659898281096,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 200.0,
|
|
"eval_loss": 10.189282417297363,
|
|
"eval_runtime": 0.6861,
|
|
"eval_samples_per_second": 728.743,
|
|
"eval_steps_per_second": 72.874,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 201.0,
|
|
"grad_norm": 0.98046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04809910655021667,
|
|
"loss_d0": 0.04849519394338131,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 202.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.041577893495559695,
|
|
"loss_d0": 0.041943120583891866,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 203.0,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04084473252296448,
|
|
"loss_d0": 0.04086658768355846,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 204.0,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03976602852344513,
|
|
"loss_d0": 0.03992565609514713,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 205.0,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04404784142971039,
|
|
"loss_d0": 0.043900683894753455,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 206.0,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04731652736663818,
|
|
"loss_d0": 0.04724227301776409,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 207.0,
|
|
"grad_norm": 0.2421875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04152216017246246,
|
|
"loss_d0": 0.04180925637483597,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 208.0,
|
|
"grad_norm": 0.314453125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04118179380893707,
|
|
"loss_d0": 0.04148880951106548,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 209.0,
|
|
"grad_norm": 0.412109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04137121140956879,
|
|
"loss_d0": 0.04159573912620544,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 210.0,
|
|
"grad_norm": 0.447265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.044093775749206546,
|
|
"loss_d0": 0.0443379782140255,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 211.0,
|
|
"grad_norm": 0.470703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04798535108566284,
|
|
"loss_d0": 0.04776673950254917,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 212.0,
|
|
"grad_norm": 0.419921875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05737084746360779,
|
|
"loss_d0": 0.05612550266087055,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 213.0,
|
|
"grad_norm": 0.39453125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.046094492077827454,
|
|
"loss_d0": 0.04582822136580944,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 214.0,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04114535450935364,
|
|
"loss_d0": 0.04123819507658481,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 215.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039699801802635194,
|
|
"loss_d0": 0.040003697574138644,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 216.0,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03862698972225189,
|
|
"loss_d0": 0.03893596157431602,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 217.0,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.043020579218864444,
|
|
"loss_d0": 0.0432659212499857,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 218.0,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04398588240146637,
|
|
"loss_d0": 0.04368347264826298,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 219.0,
|
|
"grad_norm": 0.390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040211325883865355,
|
|
"loss_d0": 0.040444132313132285,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 220.0,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04232992231845856,
|
|
"loss_d0": 0.04273700416088104,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 221.0,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03988331258296966,
|
|
"loss_d0": 0.040199489891529085,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 222.0,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03946065902709961,
|
|
"loss_d0": 0.03989113420248032,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 223.0,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.038988009095191956,
|
|
"loss_d0": 0.03922968059778213,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 224.0,
|
|
"grad_norm": 0.5234375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039998382329940796,
|
|
"loss_d0": 0.040379610285162924,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 225.0,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.041840368509292604,
|
|
"loss_d0": 0.042378640919923785,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 226.0,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04227463901042938,
|
|
"loss_d0": 0.04284324869513512,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 227.0,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04585360586643219,
|
|
"loss_d0": 0.045701490342617036,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 228.0,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0420985072851181,
|
|
"loss_d0": 0.042375285923480985,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 229.0,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04348675310611725,
|
|
"loss_d0": 0.043607931956648825,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 230.0,
|
|
"grad_norm": 0.447265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.043425142765045166,
|
|
"loss_d0": 0.04386321604251862,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 231.0,
|
|
"grad_norm": 0.34375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.05011132955551147,
|
|
"loss_d0": 0.05058671832084656,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 232.0,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04243658483028412,
|
|
"loss_d0": 0.04289327785372734,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 233.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042480701208114625,
|
|
"loss_d0": 0.04278766848146916,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 234.0,
|
|
"grad_norm": 0.38671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03948282897472381,
|
|
"loss_d0": 0.03976398035883903,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 235.0,
|
|
"grad_norm": 0.3203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03911280632019043,
|
|
"loss_d0": 0.03939221054315567,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 236.0,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04534493386745453,
|
|
"loss_d0": 0.0454285766929388,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 237.0,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04498372673988342,
|
|
"loss_d0": 0.04524303488433361,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 238.0,
|
|
"grad_norm": 0.306640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04036388099193573,
|
|
"loss_d0": 0.04051109738647938,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 239.0,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03902736306190491,
|
|
"loss_d0": 0.039412683621048925,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 240.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03764301538467407,
|
|
"loss_d0": 0.037954670190811154,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 241.0,
|
|
"grad_norm": 0.283203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03973473310470581,
|
|
"loss_d0": 0.03991707712411881,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 242.0,
|
|
"grad_norm": 0.236328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03933502435684204,
|
|
"loss_d0": 0.03948218524456024,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 243.0,
|
|
"grad_norm": 0.408203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04040732085704803,
|
|
"loss_d0": 0.040525125712156294,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 244.0,
|
|
"grad_norm": 0.40625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04274722635746002,
|
|
"loss_d0": 0.04287994578480721,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 245.0,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04024344384670257,
|
|
"loss_d0": 0.04048874229192734,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 246.0,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040749162435531616,
|
|
"loss_d0": 0.04102331958711147,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 247.0,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0426899254322052,
|
|
"loss_d0": 0.04291442297399044,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 248.0,
|
|
"grad_norm": 0.396484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039026832580566405,
|
|
"loss_d0": 0.03924530446529388,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 249.0,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0383389413356781,
|
|
"loss_d0": 0.03859546259045601,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 250.0,
|
|
"grad_norm": 0.369140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.038316363096237184,
|
|
"loss_d0": 0.03865836299955845,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 250.0,
|
|
"eval_loss": 10.885120391845703,
|
|
"eval_runtime": 0.6922,
|
|
"eval_samples_per_second": 722.309,
|
|
"eval_steps_per_second": 72.231,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 251.0,
|
|
"grad_norm": 0.2294921875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03913344144821167,
|
|
"loss_d0": 0.039387579634785654,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 252.0,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03892936110496521,
|
|
"loss_d0": 0.03930997662246227,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 253.0,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037770673632621765,
|
|
"loss_d0": 0.038118017837405205,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 254.0,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037772169709205626,
|
|
"loss_d0": 0.03807541318237782,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 255.0,
|
|
"grad_norm": 0.7890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03999180793762207,
|
|
"loss_d0": 0.04036150127649307,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 256.0,
|
|
"grad_norm": 0.4765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.052530336380004886,
|
|
"loss_d0": 0.05343040004372597,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 257.0,
|
|
"grad_norm": 0.40625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04317347705364227,
|
|
"loss_d0": 0.04373372159898281,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 258.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03994796574115753,
|
|
"loss_d0": 0.04031400717794895,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 259.0,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.038785922527313235,
|
|
"loss_d0": 0.039084702357649805,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 260.0,
|
|
"grad_norm": 0.373046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039844360947608945,
|
|
"loss_d0": 0.040255676582455636,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 261.0,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04429091215133667,
|
|
"loss_d0": 0.04446314424276352,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 262.0,
|
|
"grad_norm": 0.4453125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04119807183742523,
|
|
"loss_d0": 0.0414251770824194,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 263.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03950552344322204,
|
|
"loss_d0": 0.039706287905573845,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 264.0,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0390357106924057,
|
|
"loss_d0": 0.039455119892954825,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 265.0,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03879677653312683,
|
|
"loss_d0": 0.0390281654894352,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 266.0,
|
|
"grad_norm": 0.51953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039793723821640016,
|
|
"loss_d0": 0.04023738354444504,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 267.0,
|
|
"grad_norm": 0.2412109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037570255994796756,
|
|
"loss_d0": 0.037908059731125834,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 268.0,
|
|
"grad_norm": 0.23046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03747315108776093,
|
|
"loss_d0": 0.03773516528308392,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 269.0,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03748213052749634,
|
|
"loss_d0": 0.03784133456647396,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 270.0,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0373074471950531,
|
|
"loss_d0": 0.03761020861566067,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 271.0,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03723294138908386,
|
|
"loss_d0": 0.03757789246737957,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 272.0,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0372806191444397,
|
|
"loss_d0": 0.03759502917528153,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 273.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037115806341171266,
|
|
"loss_d0": 0.037448635697364806,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 274.0,
|
|
"grad_norm": 0.3203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0369686633348465,
|
|
"loss_d0": 0.037282370403409,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 275.0,
|
|
"grad_norm": 0.283203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03698193728923797,
|
|
"loss_d0": 0.03725597597658634,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 276.0,
|
|
"grad_norm": 0.306640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037520098686218264,
|
|
"loss_d0": 0.03782733231782913,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 277.0,
|
|
"grad_norm": 0.3203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03804133534431457,
|
|
"loss_d0": 0.038362907245755196,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 278.0,
|
|
"grad_norm": 0.330078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037969928979873654,
|
|
"loss_d0": 0.03825241588056087,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 279.0,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037792128324508664,
|
|
"loss_d0": 0.03807285577058792,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 280.0,
|
|
"grad_norm": 0.38671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03775832355022431,
|
|
"loss_d0": 0.03801813460886479,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 281.0,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03686901330947876,
|
|
"loss_d0": 0.037187918275594714,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 282.0,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03696819245815277,
|
|
"loss_d0": 0.037317240983247756,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 283.0,
|
|
"grad_norm": 0.30078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037478744983673096,
|
|
"loss_d0": 0.03774147853255272,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 284.0,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040481334924697875,
|
|
"loss_d0": 0.04085813723504543,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 285.0,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04263193607330322,
|
|
"loss_d0": 0.04322305843234062,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 286.0,
|
|
"grad_norm": 0.400390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04221307337284088,
|
|
"loss_d0": 0.04266056790947914,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 287.0,
|
|
"grad_norm": 0.26171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04012386798858643,
|
|
"loss_d0": 0.04027114436030388,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 288.0,
|
|
"grad_norm": 0.6328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04546632468700409,
|
|
"loss_d0": 0.045037579536437986,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 289.0,
|
|
"grad_norm": 0.333984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04220171272754669,
|
|
"loss_d0": 0.04206137731671333,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 290.0,
|
|
"grad_norm": 0.349609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03849797248840332,
|
|
"loss_d0": 0.03877202942967415,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 291.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03816980719566345,
|
|
"loss_d0": 0.038458903506398204,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 292.0,
|
|
"grad_norm": 0.349609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039384329319000246,
|
|
"loss_d0": 0.03953510671854019,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 293.0,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03914114236831665,
|
|
"loss_d0": 0.03927744776010513,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 294.0,
|
|
"grad_norm": 0.462890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037627822160720824,
|
|
"loss_d0": 0.03790898621082306,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 295.0,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037601858377456665,
|
|
"loss_d0": 0.03786470964550972,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 296.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03714042901992798,
|
|
"loss_d0": 0.03739793673157692,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 297.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037202891707420346,
|
|
"loss_d0": 0.03755674138665199,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 298.0,
|
|
"grad_norm": 0.390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03793781399726868,
|
|
"loss_d0": 0.03822383023798466,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 299.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.038024306297302246,
|
|
"loss_d0": 0.03834304548799992,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 300.0,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03749783039093017,
|
|
"loss_d0": 0.03785845525562763,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 300.0,
|
|
"eval_loss": 11.146424293518066,
|
|
"eval_runtime": 0.6864,
|
|
"eval_samples_per_second": 728.387,
|
|
"eval_steps_per_second": 72.839,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 301.0,
|
|
"grad_norm": 0.33984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.038469833135604856,
|
|
"loss_d0": 0.03886338211596012,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 302.0,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04090344309806824,
|
|
"loss_d0": 0.04116071537137032,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 303.0,
|
|
"grad_norm": 0.36328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040218299627304076,
|
|
"loss_d0": 0.04055294916033745,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 304.0,
|
|
"grad_norm": 0.462890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04446632564067841,
|
|
"loss_d0": 0.04473265036940575,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 305.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04086683392524719,
|
|
"loss_d0": 0.04126431494951248,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 306.0,
|
|
"grad_norm": 0.34765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.048713570833206175,
|
|
"loss_d0": 0.04855058118700981,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 307.0,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.046805566549301146,
|
|
"loss_d0": 0.04710230566561222,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 308.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03934524655342102,
|
|
"loss_d0": 0.039762004464864734,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 309.0,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03915072977542877,
|
|
"loss_d0": 0.03944177031517029,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 310.0,
|
|
"grad_norm": 0.3671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04065064489841461,
|
|
"loss_d0": 0.04100923091173172,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 311.0,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037496811151504515,
|
|
"loss_d0": 0.03782621137797833,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 312.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037671661376953124,
|
|
"loss_d0": 0.038110511004924776,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 313.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0369190514087677,
|
|
"loss_d0": 0.03722741194069386,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 314.0,
|
|
"grad_norm": 0.326171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036951732635498044,
|
|
"loss_d0": 0.03724584951996803,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 315.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03681913614273071,
|
|
"loss_d0": 0.03710218816995621,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 316.0,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036740392446517944,
|
|
"loss_d0": 0.03705124892294407,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 317.0,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03693808317184448,
|
|
"loss_d0": 0.03728438019752502,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 318.0,
|
|
"grad_norm": 0.53125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03715154826641083,
|
|
"loss_d0": 0.03744188435375691,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 319.0,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03702226579189301,
|
|
"loss_d0": 0.037342607975006104,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 320.0,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03733651638031006,
|
|
"loss_d0": 0.037650084123015404,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 321.0,
|
|
"grad_norm": 0.283203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03700153231620788,
|
|
"loss_d0": 0.03734440542757511,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 322.0,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03706228733062744,
|
|
"loss_d0": 0.037381384521722794,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 323.0,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036909821629524234,
|
|
"loss_d0": 0.03724093846976757,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 324.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03717797100543976,
|
|
"loss_d0": 0.03748043179512024,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 325.0,
|
|
"grad_norm": 0.25390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03671996295452118,
|
|
"loss_d0": 0.03704087920486927,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 326.0,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03725916743278503,
|
|
"loss_d0": 0.03758593760430813,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 327.0,
|
|
"grad_norm": 0.24609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037097156047821045,
|
|
"loss_d0": 0.03740981854498386,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 328.0,
|
|
"grad_norm": 0.3515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037044870853424075,
|
|
"loss_d0": 0.03732852153480053,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 329.0,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037512749433517456,
|
|
"loss_d0": 0.03780471496284008,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 330.0,
|
|
"grad_norm": 0.423828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03871379792690277,
|
|
"loss_d0": 0.03908683769404888,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 331.0,
|
|
"grad_norm": 0.341796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.038561710715293886,
|
|
"loss_d0": 0.038924089074134825,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 332.0,
|
|
"grad_norm": 0.88671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04439712464809418,
|
|
"loss_d0": 0.04472551830112934,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 333.0,
|
|
"grad_norm": 0.38671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04164994060993195,
|
|
"loss_d0": 0.041940994933247565,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 334.0,
|
|
"grad_norm": 0.412109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040698114037513736,
|
|
"loss_d0": 0.04085970595479012,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 335.0,
|
|
"grad_norm": 0.326171875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03932968080043793,
|
|
"loss_d0": 0.03956272974610329,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 336.0,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0391847550868988,
|
|
"loss_d0": 0.03948534913361072,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 337.0,
|
|
"grad_norm": 0.328125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.047348752617836,
|
|
"loss_d0": 0.046868476271629336,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 338.0,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0379530131816864,
|
|
"loss_d0": 0.0382020853459835,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 339.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04256679117679596,
|
|
"loss_d0": 0.04314272291958332,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 340.0,
|
|
"grad_norm": 0.423828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.056311219930648804,
|
|
"loss_d0": 0.05685732625424862,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 341.0,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04154669046401978,
|
|
"loss_d0": 0.04171246141195297,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 342.0,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04307686388492584,
|
|
"loss_d0": 0.04326325096189976,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 343.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04260125458240509,
|
|
"loss_d0": 0.04288202822208405,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 344.0,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04342672824859619,
|
|
"loss_d0": 0.043964647501707074,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 345.0,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.041104856133461,
|
|
"loss_d0": 0.04152129665017128,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 346.0,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0384725421667099,
|
|
"loss_d0": 0.03876579888164997,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 347.0,
|
|
"grad_norm": 0.380859375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04155246317386627,
|
|
"loss_d0": 0.04204757548868656,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 348.0,
|
|
"grad_norm": 0.376953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04175013601779938,
|
|
"loss_d0": 0.04206421263515949,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 349.0,
|
|
"grad_norm": 0.3984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03953442573547363,
|
|
"loss_d0": 0.03970748074352741,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 350.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03868723213672638,
|
|
"loss_d0": 0.03908204138278961,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 350.0,
|
|
"eval_loss": 10.020953178405762,
|
|
"eval_runtime": 0.6866,
|
|
"eval_samples_per_second": 728.252,
|
|
"eval_steps_per_second": 72.825,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 351.0,
|
|
"grad_norm": 0.333984375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04133128821849823,
|
|
"loss_d0": 0.041662900149822234,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 352.0,
|
|
"grad_norm": 0.310546875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04269187152385712,
|
|
"loss_d0": 0.04334259107708931,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 353.0,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03852761685848236,
|
|
"loss_d0": 0.03878095783293247,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 354.0,
|
|
"grad_norm": 0.32421875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04052022397518158,
|
|
"loss_d0": 0.04095298685133457,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 355.0,
|
|
"grad_norm": 0.2890625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04115490317344665,
|
|
"loss_d0": 0.041552980244159696,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 356.0,
|
|
"grad_norm": 0.39453125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.042296862602233885,
|
|
"loss_d0": 0.04288714602589607,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 357.0,
|
|
"grad_norm": 0.34765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.041248321533203125,
|
|
"loss_d0": 0.04149158634245396,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 358.0,
|
|
"grad_norm": 0.30078125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.04039554595947266,
|
|
"loss_d0": 0.04083836451172829,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 359.0,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039502471685409546,
|
|
"loss_d0": 0.04000399447977543,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 360.0,
|
|
"grad_norm": 0.60546875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.043854904174804685,
|
|
"loss_d0": 0.04427521526813507,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 361.0,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.040008130669593814,
|
|
"loss_d0": 0.04016649015247822,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 362.0,
|
|
"grad_norm": 0.396484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03769044280052185,
|
|
"loss_d0": 0.03798259571194649,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 363.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039430353045463565,
|
|
"loss_d0": 0.039879053831100464,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 364.0,
|
|
"grad_norm": 0.33203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03827457427978516,
|
|
"loss_d0": 0.03869166634976864,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 365.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037254220247268675,
|
|
"loss_d0": 0.03753783367574215,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 366.0,
|
|
"grad_norm": 0.3203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036964389681816104,
|
|
"loss_d0": 0.037302806973457336,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 367.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036746549606323245,
|
|
"loss_d0": 0.03708359859883785,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 368.0,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0370238333940506,
|
|
"loss_d0": 0.037373238056898114,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 369.0,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03776443004608154,
|
|
"loss_d0": 0.038052943721413615,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 370.0,
|
|
"grad_norm": 0.40625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037547925114631654,
|
|
"loss_d0": 0.037854228913784024,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 371.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03720632791519165,
|
|
"loss_d0": 0.03753346242010593,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 372.0,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0377034991979599,
|
|
"loss_d0": 0.037998438253998755,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 373.0,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03689497411251068,
|
|
"loss_d0": 0.03722812980413437,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 374.0,
|
|
"grad_norm": 0.349609375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036770951747894284,
|
|
"loss_d0": 0.03706214055418968,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 375.0,
|
|
"grad_norm": 0.3671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03705790340900421,
|
|
"loss_d0": 0.03742879740893841,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 376.0,
|
|
"grad_norm": 0.3203125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0367428719997406,
|
|
"loss_d0": 0.037059960514307023,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 377.0,
|
|
"grad_norm": 0.3359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036790531873703,
|
|
"loss_d0": 0.03712297640740871,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 378.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03700532913208008,
|
|
"loss_d0": 0.03728572316467762,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 379.0,
|
|
"grad_norm": 0.341796875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03674401640892029,
|
|
"loss_d0": 0.03708376474678517,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 380.0,
|
|
"grad_norm": 0.3828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037165766954421996,
|
|
"loss_d0": 0.037469035014510155,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 381.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03820574879646301,
|
|
"loss_d0": 0.038526909053325654,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 382.0,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03871320784091949,
|
|
"loss_d0": 0.03914758861064911,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 383.0,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03733502924442291,
|
|
"loss_d0": 0.03770691566169262,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 384.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03720555305480957,
|
|
"loss_d0": 0.03752491697669029,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 385.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.039854270219802854,
|
|
"loss_d0": 0.04018958024680615,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 386.0,
|
|
"grad_norm": 0.2490234375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03766619563102722,
|
|
"loss_d0": 0.037941229343414304,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 387.0,
|
|
"grad_norm": 0.291015625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03740113973617554,
|
|
"loss_d0": 0.0377108845859766,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 388.0,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0372713029384613,
|
|
"loss_d0": 0.0375568337738514,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 389.0,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03675893843173981,
|
|
"loss_d0": 0.037108558043837545,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 390.0,
|
|
"grad_norm": 0.29296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.037296104431152347,
|
|
"loss_d0": 0.0375242929905653,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 391.0,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03685269951820373,
|
|
"loss_d0": 0.0371894758194685,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 392.0,
|
|
"grad_norm": 0.25,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036673200130462644,
|
|
"loss_d0": 0.036988198012113574,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 393.0,
|
|
"grad_norm": 0.28515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036616355180740356,
|
|
"loss_d0": 0.03695385381579399,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 394.0,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036866238713264464,
|
|
"loss_d0": 0.03716698214411736,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 395.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03705916702747345,
|
|
"loss_d0": 0.03739931918680668,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 396.0,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0367851734161377,
|
|
"loss_d0": 0.037085448205471036,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 397.0,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03684147596359253,
|
|
"loss_d0": 0.03716961406171322,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 398.0,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03675464391708374,
|
|
"loss_d0": 0.03707803189754486,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 399.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03699171245098114,
|
|
"loss_d0": 0.03729988299310207,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 400.0,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03692775666713714,
|
|
"loss_d0": 0.037251610308885574,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 400.0,
|
|
"eval_loss": 11.901162147521973,
|
|
"eval_runtime": 0.6874,
|
|
"eval_samples_per_second": 727.37,
|
|
"eval_steps_per_second": 72.737,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 401.0,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03675454556941986,
|
|
"loss_d0": 0.03709004819393158,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 402.0,
|
|
"grad_norm": 0.3359375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036843341588973996,
|
|
"loss_d0": 0.03714859746396541,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 403.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03684849143028259,
|
|
"loss_d0": 0.03714573718607426,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 404.0,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03681612014770508,
|
|
"loss_d0": 0.037144556641578674,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 405.0,
|
|
"grad_norm": 0.2431640625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03668281435966492,
|
|
"loss_d0": 0.03700208105146885,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 406.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03672113716602325,
|
|
"loss_d0": 0.03706220649182797,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 407.0,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03671231269836426,
|
|
"loss_d0": 0.03705513551831245,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 408.0,
|
|
"grad_norm": 0.2353515625,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.0371286153793335,
|
|
"loss_d0": 0.037407181411981585,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 409.0,
|
|
"grad_norm": 0.38671875,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.036925724148750304,
|
|
"loss_d0": 0.037227736040949824,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 410.0,
|
|
"grad_norm": 0.3125,
|
|
"learning_rate": 0.0002,
|
|
"loss": 0.03719911873340607,
|
|
"loss_d0": 0.03746572397649288,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 411.0,
|
|
"grad_norm": 0.31640625,
|
|
"learning_rate": 0.00019995559043291586,
|
|
"loss": 0.03746194541454315,
|
|
"loss_d0": 0.03776235654950142,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 412.0,
|
|
"grad_norm": 0.248046875,
|
|
"learning_rate": 0.0001998021321462845,
|
|
"loss": 0.03715557157993317,
|
|
"loss_d0": 0.03750314898788929,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 413.0,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 0.00019953926379459095,
|
|
"loss": 0.037144222855567934,
|
|
"loss_d0": 0.03752333410084248,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 414.0,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 0.00019916730564242994,
|
|
"loss": 0.037147408723831175,
|
|
"loss_d0": 0.03745650127530098,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 415.0,
|
|
"grad_norm": 0.62109375,
|
|
"learning_rate": 0.00019868671086351413,
|
|
"loss": 0.03663991689682007,
|
|
"loss_d0": 0.03694714643061161,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 416.0,
|
|
"grad_norm": 0.33984375,
|
|
"learning_rate": 0.00019809806498855166,
|
|
"loss": 0.03903592824935913,
|
|
"loss_d0": 0.03945017009973526,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 417.0,
|
|
"grad_norm": 0.8515625,
|
|
"learning_rate": 0.00019740208519186726,
|
|
"loss": 0.05429054498672485,
|
|
"loss_d0": 0.05633100271224976,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 418.0,
|
|
"grad_norm": 0.67578125,
|
|
"learning_rate": 0.0001965996194176357,
|
|
"loss": 0.05051107406616211,
|
|
"loss_d0": 0.05182218365371227,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 419.0,
|
|
"grad_norm": 0.357421875,
|
|
"learning_rate": 0.00019569164534679248,
|
|
"loss": 0.038011634349823,
|
|
"loss_d0": 0.038474849238991735,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 420.0,
|
|
"grad_norm": 0.318359375,
|
|
"learning_rate": 0.0001946792692058803,
|
|
"loss": 0.036821508407592775,
|
|
"loss_d0": 0.037107934802770616,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 421.0,
|
|
"grad_norm": 0.2890625,
|
|
"learning_rate": 0.00019356372441928221,
|
|
"loss": 0.03671710193157196,
|
|
"loss_d0": 0.03702742531895638,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 422.0,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 0.00019234637010648426,
|
|
"loss": 0.03749249279499054,
|
|
"loss_d0": 0.03791201822459698,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 423.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 0.00019102868942619743,
|
|
"loss": 0.039152055978775024,
|
|
"loss_d0": 0.0393472570925951,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 424.0,
|
|
"grad_norm": 0.310546875,
|
|
"learning_rate": 0.00018961228776935755,
|
|
"loss": 0.03920052945613861,
|
|
"loss_d0": 0.03954476937651634,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 425.0,
|
|
"grad_norm": 0.39453125,
|
|
"learning_rate": 0.00018809889080320357,
|
|
"loss": 0.04085721671581268,
|
|
"loss_d0": 0.0409322090446949,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 426.0,
|
|
"grad_norm": 0.419921875,
|
|
"learning_rate": 0.00018649034236881777,
|
|
"loss": 0.03923974931240082,
|
|
"loss_d0": 0.03957121372222901,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 427.0,
|
|
"grad_norm": 0.3828125,
|
|
"learning_rate": 0.00018478860223468955,
|
|
"loss": 0.03778021037578583,
|
|
"loss_d0": 0.03809101954102516,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 428.0,
|
|
"grad_norm": 0.46875,
|
|
"learning_rate": 0.0001829957437090394,
|
|
"loss": 0.03898613452911377,
|
|
"loss_d0": 0.03935887552797794,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 429.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 0.00018111395111381214,
|
|
"loss": 0.03973522186279297,
|
|
"loss_d0": 0.03986812345683575,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 430.0,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 0.00017914551712341713,
|
|
"loss": 0.038596144318580626,
|
|
"loss_d0": 0.03911666721105576,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 431.0,
|
|
"grad_norm": 0.279296875,
|
|
"learning_rate": 0.0001770928399714576,
|
|
"loss": 0.03771106004714966,
|
|
"loss_d0": 0.038051551580429076,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 432.0,
|
|
"grad_norm": 0.345703125,
|
|
"learning_rate": 0.0001749584205288526,
|
|
"loss": 0.03960946798324585,
|
|
"loss_d0": 0.03986733630299568,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 433.0,
|
|
"grad_norm": 0.57421875,
|
|
"learning_rate": 0.00017274485925691083,
|
|
"loss": 0.03941147327423096,
|
|
"loss_d0": 0.039736605063080785,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 434.0,
|
|
"grad_norm": 0.3671875,
|
|
"learning_rate": 0.00017045485303906913,
|
|
"loss": 0.0394733875989914,
|
|
"loss_d0": 0.03990803770720959,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 435.0,
|
|
"grad_norm": 0.3671875,
|
|
"learning_rate": 0.00016809119189515557,
|
|
"loss": 0.03905892372131348,
|
|
"loss_d0": 0.03944002017378807,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 436.0,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 0.00016565675558217989,
|
|
"loss": 0.037955057621002194,
|
|
"loss_d0": 0.038193025067448615,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 437.0,
|
|
"grad_norm": 0.90625,
|
|
"learning_rate": 0.00016315451008579328,
|
|
"loss": 0.05242310762405396,
|
|
"loss_d0": 0.05061047412455082,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 438.0,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 0.00016058750400669178,
|
|
"loss": 0.0368131011724472,
|
|
"loss_d0": 0.03710653893649578,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 439.0,
|
|
"grad_norm": 0.423828125,
|
|
"learning_rate": 0.0001579588648463657,
|
|
"loss": 0.036599275469779965,
|
|
"loss_d0": 0.03693968802690506,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 440.0,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 0.00015527179519672117,
|
|
"loss": 0.036560848355293274,
|
|
"loss_d0": 0.03687223196029663,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 441.0,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 0.00015252956883821488,
|
|
"loss": 0.03625948429107666,
|
|
"loss_d0": 0.03659016117453575,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 442.0,
|
|
"grad_norm": 0.298828125,
|
|
"learning_rate": 0.00014973552675125708,
|
|
"loss": 0.036302709579467775,
|
|
"loss_d0": 0.03660444766283035,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 443.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 0.00014689307304574154,
|
|
"loss": 0.03645941019058228,
|
|
"loss_d0": 0.036814498528838155,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 444.0,
|
|
"grad_norm": 0.318359375,
|
|
"learning_rate": 0.00014400567081366205,
|
|
"loss": 0.03634356260299683,
|
|
"loss_d0": 0.03664385080337525,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 445.0,
|
|
"grad_norm": 0.296875,
|
|
"learning_rate": 0.00014107683790986813,
|
|
"loss": 0.03630726635456085,
|
|
"loss_d0": 0.03658915832638741,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 446.0,
|
|
"grad_norm": 0.283203125,
|
|
"learning_rate": 0.00013811014266610096,
|
|
"loss": 0.036189505457878114,
|
|
"loss_d0": 0.03651743419468403,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 447.0,
|
|
"grad_norm": 0.341796875,
|
|
"learning_rate": 0.00013510919954353066,
|
|
"loss": 0.03628252744674683,
|
|
"loss_d0": 0.03659649156033993,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 448.0,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 0.00013207766472909225,
|
|
"loss": 0.03624842762947082,
|
|
"loss_d0": 0.0365591075271368,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 449.0,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 0.000129019231680985,
|
|
"loss": 0.03611701428890228,
|
|
"loss_d0": 0.03644072562456131,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 450.0,
|
|
"grad_norm": 0.3046875,
|
|
"learning_rate": 0.0001259376266287625,
|
|
"loss": 0.036150026321411136,
|
|
"loss_d0": 0.0364865392446518,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 450.0,
|
|
"eval_loss": 11.761486053466797,
|
|
"eval_runtime": 0.6889,
|
|
"eval_samples_per_second": 725.846,
|
|
"eval_steps_per_second": 72.585,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 451.0,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 0.00012283660403349607,
|
|
"loss": 0.036095789074897765,
|
|
"loss_d0": 0.03643478117883205,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 452.0,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 0.00011971994201354204,
|
|
"loss": 0.03615381121635437,
|
|
"loss_d0": 0.036472433060407636,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 453.0,
|
|
"grad_norm": 0.267578125,
|
|
"learning_rate": 0.00011659143774148684,
|
|
"loss": 0.03610163033008575,
|
|
"loss_d0": 0.036404192447662354,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 454.0,
|
|
"grad_norm": 0.328125,
|
|
"learning_rate": 0.0001134549028178768,
|
|
"loss": 0.03613078892230988,
|
|
"loss_d0": 0.036461538076400755,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 455.0,
|
|
"grad_norm": 0.2333984375,
|
|
"learning_rate": 0.00011031415862737014,
|
|
"loss": 0.03611861169338226,
|
|
"loss_d0": 0.03640886433422565,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 456.0,
|
|
"grad_norm": 0.2470703125,
|
|
"learning_rate": 0.00010717303168296846,
|
|
"loss": 0.03604468107223511,
|
|
"loss_d0": 0.03640021868050099,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 457.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 0.000104035348964,
|
|
"loss": 0.036168360710144044,
|
|
"loss_d0": 0.036474670842289926,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 458.0,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 0.00010090493325353484,
|
|
"loss": 0.03600202202796936,
|
|
"loss_d0": 0.03632246777415275,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 459.0,
|
|
"grad_norm": 0.3671875,
|
|
"learning_rate": 9.778559848091261e-05,
|
|
"loss": 0.03613144755363464,
|
|
"loss_d0": 0.03646283820271492,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 460.0,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 9.468114507505707e-05,
|
|
"loss": 0.03605700135231018,
|
|
"loss_d0": 0.03638906553387642,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 461.0,
|
|
"grad_norm": 0.251953125,
|
|
"learning_rate": 9.15953553342389e-05,
|
|
"loss": 0.035967972874641416,
|
|
"loss_d0": 0.036280662193894385,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 462.0,
|
|
"grad_norm": 0.283203125,
|
|
"learning_rate": 8.853198881792772e-05,
|
|
"loss": 0.03607074022293091,
|
|
"loss_d0": 0.036401886865496634,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 463.0,
|
|
"grad_norm": 0.2392578125,
|
|
"learning_rate": 8.549477776634832e-05,
|
|
"loss": 0.0359768807888031,
|
|
"loss_d0": 0.0362836092710495,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 464.0,
|
|
"grad_norm": 0.314453125,
|
|
"learning_rate": 8.24874225533205e-05,
|
|
"loss": 0.03588842451572418,
|
|
"loss_d0": 0.03622284643352032,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 465.0,
|
|
"grad_norm": 0.34375,
|
|
"learning_rate": 7.951358717792378e-05,
|
|
"loss": 0.03593695759773254,
|
|
"loss_d0": 0.036245567724108696,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 466.0,
|
|
"grad_norm": 0.2578125,
|
|
"learning_rate": 7.657689480047888e-05,
|
|
"loss": 0.03589689433574676,
|
|
"loss_d0": 0.03622194863855839,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 467.0,
|
|
"grad_norm": 0.28125,
|
|
"learning_rate": 7.368092332828491e-05,
|
|
"loss": 0.03584821224212646,
|
|
"loss_d0": 0.03617323003709316,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 468.0,
|
|
"grad_norm": 0.33984375,
|
|
"learning_rate": 7.082920105649054e-05,
|
|
"loss": 0.03588172793388367,
|
|
"loss_d0": 0.03619707673788071,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 469.0,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 6.80252023694098e-05,
|
|
"loss": 0.03584883213043213,
|
|
"loss_d0": 0.03617900386452675,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 470.0,
|
|
"grad_norm": 0.3359375,
|
|
"learning_rate": 6.527234350752003e-05,
|
|
"loss": 0.035852047801017764,
|
|
"loss_d0": 0.0361775953322649,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 471.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 6.257397840529903e-05,
|
|
"loss": 0.03582252562046051,
|
|
"loss_d0": 0.03615486063063145,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 472.0,
|
|
"grad_norm": 0.2734375,
|
|
"learning_rate": 5.993339460497257e-05,
|
|
"loss": 0.03581757247447968,
|
|
"loss_d0": 0.036142122372984885,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 473.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 5.7353809251150606e-05,
|
|
"loss": 0.0358079195022583,
|
|
"loss_d0": 0.036134665831923485,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 474.0,
|
|
"grad_norm": 0.2373046875,
|
|
"learning_rate": 5.483836517123214e-05,
|
|
"loss": 0.035815265774726865,
|
|
"loss_d0": 0.036152683570981024,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 475.0,
|
|
"grad_norm": 0.322265625,
|
|
"learning_rate": 5.239012704635402e-05,
|
|
"loss": 0.03577219545841217,
|
|
"loss_d0": 0.036099201813340184,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 476.0,
|
|
"grad_norm": 0.271484375,
|
|
"learning_rate": 5.0012077677549283e-05,
|
|
"loss": 0.03577747642993927,
|
|
"loss_d0": 0.03610123656690121,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 477.0,
|
|
"grad_norm": 0.23828125,
|
|
"learning_rate": 4.77071143516634e-05,
|
|
"loss": 0.03580273985862732,
|
|
"loss_d0": 0.03613555021584034,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 478.0,
|
|
"grad_norm": 0.27734375,
|
|
"learning_rate": 4.547804531145656e-05,
|
|
"loss": 0.035796952247619626,
|
|
"loss_d0": 0.036111927777528766,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 479.0,
|
|
"grad_norm": 0.35546875,
|
|
"learning_rate": 4.332758633419252e-05,
|
|
"loss": 0.035767361521720886,
|
|
"loss_d0": 0.03609406426548958,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 480.0,
|
|
"grad_norm": 0.265625,
|
|
"learning_rate": 4.12583574228822e-05,
|
|
"loss": 0.03574168682098389,
|
|
"loss_d0": 0.03606498539447785,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 481.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 3.927287961421382e-05,
|
|
"loss": 0.035773900151252744,
|
|
"loss_d0": 0.03608821220695972,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 482.0,
|
|
"grad_norm": 0.275390625,
|
|
"learning_rate": 3.737357190705782e-05,
|
|
"loss": 0.03574726283550263,
|
|
"loss_d0": 0.03607319518923759,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 483.0,
|
|
"grad_norm": 0.287109375,
|
|
"learning_rate": 3.556274831528945e-05,
|
|
"loss": 0.03574813306331635,
|
|
"loss_d0": 0.0360788069665432,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 484.0,
|
|
"grad_norm": 0.26953125,
|
|
"learning_rate": 3.3842615048519255e-05,
|
|
"loss": 0.03571727573871612,
|
|
"loss_d0": 0.03603735640645027,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 485.0,
|
|
"grad_norm": 0.263671875,
|
|
"learning_rate": 3.221526782416659e-05,
|
|
"loss": 0.035741984844207764,
|
|
"loss_d0": 0.0360604640096426,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 486.0,
|
|
"grad_norm": 0.30859375,
|
|
"learning_rate": 3.068268931415069e-05,
|
|
"loss": 0.035722389817237854,
|
|
"loss_d0": 0.03604618720710277,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 487.0,
|
|
"grad_norm": 0.259765625,
|
|
"learning_rate": 2.9246746729310446e-05,
|
|
"loss": 0.03571443259716034,
|
|
"loss_d0": 0.03603012822568417,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 488.0,
|
|
"grad_norm": 0.2275390625,
|
|
"learning_rate": 2.7909189544495435e-05,
|
|
"loss": 0.03573389947414398,
|
|
"loss_d0": 0.036041321232914925,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 489.0,
|
|
"grad_norm": 0.240234375,
|
|
"learning_rate": 2.6671647367100477e-05,
|
|
"loss": 0.035701331496238706,
|
|
"loss_d0": 0.03603383935987949,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 490.0,
|
|
"grad_norm": 0.328125,
|
|
"learning_rate": 2.553562795163998e-05,
|
|
"loss": 0.035741209983825684,
|
|
"loss_d0": 0.0360419649630785,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 491.0,
|
|
"grad_norm": 0.30078125,
|
|
"learning_rate": 2.450251536278129e-05,
|
|
"loss": 0.035731592774391176,
|
|
"loss_d0": 0.0360304169356823,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 492.0,
|
|
"grad_norm": 0.244140625,
|
|
"learning_rate": 2.3573568289075136e-05,
|
|
"loss": 0.03570793569087982,
|
|
"loss_d0": 0.036030732467770575,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 493.0,
|
|
"grad_norm": 0.302734375,
|
|
"learning_rate": 2.2749918509437493e-05,
|
|
"loss": 0.03569709360599518,
|
|
"loss_d0": 0.03602620549499989,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 494.0,
|
|
"grad_norm": 0.2294921875,
|
|
"learning_rate": 2.2032569514251373e-05,
|
|
"loss": 0.03570819199085236,
|
|
"loss_d0": 0.03603471517562866,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 495.0,
|
|
"grad_norm": 0.294921875,
|
|
"learning_rate": 2.1422395282768234e-05,
|
|
"loss": 0.035699674487113954,
|
|
"loss_d0": 0.03603287264704704,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 496.0,
|
|
"grad_norm": 0.32421875,
|
|
"learning_rate": 2.092013921829899e-05,
|
|
"loss": 0.03576536178588867,
|
|
"loss_d0": 0.03607108183205128,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 497.0,
|
|
"grad_norm": 0.232421875,
|
|
"learning_rate": 2.0526413242491617e-05,
|
|
"loss": 0.035713717341423035,
|
|
"loss_d0": 0.03603534735739231,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 498.0,
|
|
"grad_norm": 0.337890625,
|
|
"learning_rate": 2.0241697049798773e-05,
|
|
"loss": 0.03570127785205841,
|
|
"loss_d0": 0.03601216375827789,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 499.0,
|
|
"grad_norm": 0.2421875,
|
|
"learning_rate": 2.0066337523044098e-05,
|
|
"loss": 0.03573695719242096,
|
|
"loss_d0": 0.03605118878185749,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 500.0,
|
|
"grad_norm": 0.255859375,
|
|
"learning_rate": 2.0000548310798866e-05,
|
|
"loss": 0.03572871088981629,
|
|
"loss_d0": 0.03601981587707996,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 500.0,
|
|
"eval_loss": 12.606892585754395,
|
|
"eval_runtime": 0.6871,
|
|
"eval_samples_per_second": 727.659,
|
|
"eval_steps_per_second": 72.766,
|
|
"step": 5000
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 5000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 500,
|
|
"save_steps": 1000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 5.980892276588544e+16,
|
|
"train_batch_size": 10,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|