1934 lines
50 KiB
JSON
1934 lines
50 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 3.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 2700,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.011118832522585128,
|
||
|
|
"grad_norm": 3.517343759536743,
|
||
|
|
"learning_rate": 1.3333333333333334e-06,
|
||
|
|
"loss": 0.8242700576782227,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.022237665045170257,
|
||
|
|
"grad_norm": 0.8755150437355042,
|
||
|
|
"learning_rate": 2.814814814814815e-06,
|
||
|
|
"loss": 0.7332224369049072,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03335649756775538,
|
||
|
|
"grad_norm": 0.5096220970153809,
|
||
|
|
"learning_rate": 4.296296296296296e-06,
|
||
|
|
"loss": 0.6391815185546875,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04447533009034051,
|
||
|
|
"grad_norm": 0.39251089096069336,
|
||
|
|
"learning_rate": 5.777777777777778e-06,
|
||
|
|
"loss": 0.5873191356658936,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05559416261292564,
|
||
|
|
"grad_norm": 0.2596520781517029,
|
||
|
|
"learning_rate": 7.2592592592592605e-06,
|
||
|
|
"loss": 0.5601994514465332,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06671299513551077,
|
||
|
|
"grad_norm": 0.21380962431430817,
|
||
|
|
"learning_rate": 8.740740740740741e-06,
|
||
|
|
"loss": 0.538310432434082,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0778318276580959,
|
||
|
|
"grad_norm": 0.2109403759241104,
|
||
|
|
"learning_rate": 1.0222222222222223e-05,
|
||
|
|
"loss": 0.5150705337524414,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08895066018068103,
|
||
|
|
"grad_norm": 0.2186734825372696,
|
||
|
|
"learning_rate": 1.1703703703703703e-05,
|
||
|
|
"loss": 0.5032827377319335,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10006949270326616,
|
||
|
|
"grad_norm": 0.2625578045845032,
|
||
|
|
"learning_rate": 1.3185185185185185e-05,
|
||
|
|
"loss": 0.49479260444641116,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11118832522585129,
|
||
|
|
"grad_norm": 0.2372923344373703,
|
||
|
|
"learning_rate": 1.4666666666666666e-05,
|
||
|
|
"loss": 0.49692888259887696,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12230715774843641,
|
||
|
|
"grad_norm": 0.3200058043003082,
|
||
|
|
"learning_rate": 1.614814814814815e-05,
|
||
|
|
"loss": 0.47324380874633787,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13342599027102153,
|
||
|
|
"grad_norm": 0.2912400960922241,
|
||
|
|
"learning_rate": 1.7629629629629633e-05,
|
||
|
|
"loss": 0.4717508316040039,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14454482279360667,
|
||
|
|
"grad_norm": 0.35332974791526794,
|
||
|
|
"learning_rate": 1.9111111111111113e-05,
|
||
|
|
"loss": 0.4714301586151123,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1556636553161918,
|
||
|
|
"grad_norm": 0.3428114950656891,
|
||
|
|
"learning_rate": 2.0592592592592596e-05,
|
||
|
|
"loss": 0.4707786083221436,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16678248783877692,
|
||
|
|
"grad_norm": 0.23904399573802948,
|
||
|
|
"learning_rate": 2.2074074074074073e-05,
|
||
|
|
"loss": 0.4618192672729492,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17790132036136205,
|
||
|
|
"grad_norm": 0.3916633725166321,
|
||
|
|
"learning_rate": 2.355555555555556e-05,
|
||
|
|
"loss": 0.45795350074768065,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1890201528839472,
|
||
|
|
"grad_norm": 0.291522741317749,
|
||
|
|
"learning_rate": 2.5037037037037036e-05,
|
||
|
|
"loss": 0.4513235569000244,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20013898540653233,
|
||
|
|
"grad_norm": 0.3826625645160675,
|
||
|
|
"learning_rate": 2.651851851851852e-05,
|
||
|
|
"loss": 0.4494623184204102,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21125781792911744,
|
||
|
|
"grad_norm": 0.2857173979282379,
|
||
|
|
"learning_rate": 2.8e-05,
|
||
|
|
"loss": 0.4489255428314209,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22237665045170257,
|
||
|
|
"grad_norm": 0.2880389392375946,
|
||
|
|
"learning_rate": 2.9481481481481483e-05,
|
||
|
|
"loss": 0.44628753662109377,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2334954829742877,
|
||
|
|
"grad_norm": 0.2952287495136261,
|
||
|
|
"learning_rate": 3.096296296296296e-05,
|
||
|
|
"loss": 0.44461727142333984,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24461431549687282,
|
||
|
|
"grad_norm": 0.30243533849716187,
|
||
|
|
"learning_rate": 3.2444444444444446e-05,
|
||
|
|
"loss": 0.4425827503204346,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25573314801945796,
|
||
|
|
"grad_norm": 0.308292418718338,
|
||
|
|
"learning_rate": 3.392592592592593e-05,
|
||
|
|
"loss": 0.4386408805847168,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26685198054204307,
|
||
|
|
"grad_norm": 0.3824830651283264,
|
||
|
|
"learning_rate": 3.540740740740741e-05,
|
||
|
|
"loss": 0.43536901473999023,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27797081306462823,
|
||
|
|
"grad_norm": 0.32702672481536865,
|
||
|
|
"learning_rate": 3.6888888888888896e-05,
|
||
|
|
"loss": 0.43506660461425783,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28908964558721334,
|
||
|
|
"grad_norm": 0.349981427192688,
|
||
|
|
"learning_rate": 3.837037037037037e-05,
|
||
|
|
"loss": 0.43353118896484377,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30020847810979845,
|
||
|
|
"grad_norm": 0.26233726739883423,
|
||
|
|
"learning_rate": 3.9851851851851856e-05,
|
||
|
|
"loss": 0.4353696346282959,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3113273106323836,
|
||
|
|
"grad_norm": 0.3655954599380493,
|
||
|
|
"learning_rate": 3.999864616007525e-05,
|
||
|
|
"loss": 0.4296071529388428,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3224461431549687,
|
||
|
|
"grad_norm": 0.2861635386943817,
|
||
|
|
"learning_rate": 3.999396645490857e-05,
|
||
|
|
"loss": 0.4362457275390625,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33356497567755383,
|
||
|
|
"grad_norm": 0.27363601326942444,
|
||
|
|
"learning_rate": 3.998594495240786e-05,
|
||
|
|
"loss": 0.4307443618774414,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.344683808200139,
|
||
|
|
"grad_norm": 0.2889789044857025,
|
||
|
|
"learning_rate": 3.997458299328936e-05,
|
||
|
|
"loss": 0.4261889934539795,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3558026407227241,
|
||
|
|
"grad_norm": 0.2857920229434967,
|
||
|
|
"learning_rate": 3.99598824765942e-05,
|
||
|
|
"loss": 0.4217526435852051,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3669214732453092,
|
||
|
|
"grad_norm": 0.24713562428951263,
|
||
|
|
"learning_rate": 3.994184585937099e-05,
|
||
|
|
"loss": 0.4260251998901367,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3780403057678944,
|
||
|
|
"grad_norm": 0.321676641702652,
|
||
|
|
"learning_rate": 3.992047615626516e-05,
|
||
|
|
"loss": 0.4209277153015137,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3891591382904795,
|
||
|
|
"grad_norm": 0.32741880416870117,
|
||
|
|
"learning_rate": 3.989577693901505e-05,
|
||
|
|
"loss": 0.4218775272369385,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40027797081306465,
|
||
|
|
"grad_norm": 0.25548964738845825,
|
||
|
|
"learning_rate": 3.986775233585499e-05,
|
||
|
|
"loss": 0.42417049407958984,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41139680333564976,
|
||
|
|
"grad_norm": 0.2476252168416977,
|
||
|
|
"learning_rate": 3.983640703082523e-05,
|
||
|
|
"loss": 0.41838369369506834,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4225156358582349,
|
||
|
|
"grad_norm": 0.274328351020813,
|
||
|
|
"learning_rate": 3.980174626298914e-05,
|
||
|
|
"loss": 0.41819052696228026,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43363446838082004,
|
||
|
|
"grad_norm": 0.3162706196308136,
|
||
|
|
"learning_rate": 3.9763775825557476e-05,
|
||
|
|
"loss": 0.4195350170135498,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44475330090340515,
|
||
|
|
"grad_norm": 0.28077232837677,
|
||
|
|
"learning_rate": 3.9722502064920105e-05,
|
||
|
|
"loss": 0.4215540885925293,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45587213342599026,
|
||
|
|
"grad_norm": 0.24924027919769287,
|
||
|
|
"learning_rate": 3.967793187958533e-05,
|
||
|
|
"loss": 0.41279850006103513,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4669909659485754,
|
||
|
|
"grad_norm": 0.2332940250635147,
|
||
|
|
"learning_rate": 3.9630072719026816e-05,
|
||
|
|
"loss": 0.4138369560241699,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47810979847116053,
|
||
|
|
"grad_norm": 0.25689420104026794,
|
||
|
|
"learning_rate": 3.957893258243849e-05,
|
||
|
|
"loss": 0.41005849838256836,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48922863099374564,
|
||
|
|
"grad_norm": 0.2200179100036621,
|
||
|
|
"learning_rate": 3.9524520017397574e-05,
|
||
|
|
"loss": 0.4146875858306885,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5003474635163307,
|
||
|
|
"grad_norm": 0.2616479694843292,
|
||
|
|
"learning_rate": 3.946684411843591e-05,
|
||
|
|
"loss": 0.4060019016265869,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5114662960389159,
|
||
|
|
"grad_norm": 0.250016450881958,
|
||
|
|
"learning_rate": 3.940591452551993e-05,
|
||
|
|
"loss": 0.40746545791625977,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5225851285615011,
|
||
|
|
"grad_norm": 0.24024935066699982,
|
||
|
|
"learning_rate": 3.9341741422439416e-05,
|
||
|
|
"loss": 0.4112556457519531,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5337039610840861,
|
||
|
|
"grad_norm": 0.2836955189704895,
|
||
|
|
"learning_rate": 3.9274335535105373e-05,
|
||
|
|
"loss": 0.4094208240509033,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5448227936066713,
|
||
|
|
"grad_norm": 0.24973008036613464,
|
||
|
|
"learning_rate": 3.920370812975729e-05,
|
||
|
|
"loss": 0.41716728210449217,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5559416261292565,
|
||
|
|
"grad_norm": 0.25282275676727295,
|
||
|
|
"learning_rate": 3.912987101108012e-05,
|
||
|
|
"loss": 0.4076206684112549,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5670604586518415,
|
||
|
|
"grad_norm": 0.2290191948413849,
|
||
|
|
"learning_rate": 3.905283652023123e-05,
|
||
|
|
"loss": 0.40010957717895507,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5781792911744267,
|
||
|
|
"grad_norm": 0.2107991874217987,
|
||
|
|
"learning_rate": 3.8972617532777686e-05,
|
||
|
|
"loss": 0.4060469627380371,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5892981236970118,
|
||
|
|
"grad_norm": 0.23016655445098877,
|
||
|
|
"learning_rate": 3.8889227456544254e-05,
|
||
|
|
"loss": 0.4046156883239746,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6004169562195969,
|
||
|
|
"grad_norm": 0.20803174376487732,
|
||
|
|
"learning_rate": 3.8802680229372374e-05,
|
||
|
|
"loss": 0.40668258666992185,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6115357887421821,
|
||
|
|
"grad_norm": 0.23622117936611176,
|
||
|
|
"learning_rate": 3.8712990316790633e-05,
|
||
|
|
"loss": 0.4025533676147461,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6226546212647672,
|
||
|
|
"grad_norm": 0.20919625461101532,
|
||
|
|
"learning_rate": 3.862017270959694e-05,
|
||
|
|
"loss": 0.4043358325958252,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6337734537873523,
|
||
|
|
"grad_norm": 0.22993041574954987,
|
||
|
|
"learning_rate": 3.8524242921353e-05,
|
||
|
|
"loss": 0.3996579170227051,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6448922863099374,
|
||
|
|
"grad_norm": 0.2250276803970337,
|
||
|
|
"learning_rate": 3.842521698579136e-05,
|
||
|
|
"loss": 0.39752275943756105,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6560111188325226,
|
||
|
|
"grad_norm": 0.22397875785827637,
|
||
|
|
"learning_rate": 3.832311145413552e-05,
|
||
|
|
"loss": 0.39977571964263914,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6671299513551077,
|
||
|
|
"grad_norm": 0.22084222733974457,
|
||
|
|
"learning_rate": 3.8217943392333555e-05,
|
||
|
|
"loss": 0.3992427349090576,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6782487838776928,
|
||
|
|
"grad_norm": 0.2924990653991699,
|
||
|
|
"learning_rate": 3.810973037820572e-05,
|
||
|
|
"loss": 0.4037761211395264,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.689367616400278,
|
||
|
|
"grad_norm": 0.2066657841205597,
|
||
|
|
"learning_rate": 3.7998490498506494e-05,
|
||
|
|
"loss": 0.3975439310073853,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.700486448922863,
|
||
|
|
"grad_norm": 0.19355815649032593,
|
||
|
|
"learning_rate": 3.788424234590153e-05,
|
||
|
|
"loss": 0.4000706195831299,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7116052814454482,
|
||
|
|
"grad_norm": 0.1997012048959732,
|
||
|
|
"learning_rate": 3.776700501586009e-05,
|
||
|
|
"loss": 0.3945141792297363,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7227241139680334,
|
||
|
|
"grad_norm": 0.2134384959936142,
|
||
|
|
"learning_rate": 3.7646798103463395e-05,
|
||
|
|
"loss": 0.39577999114990237,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7338429464906184,
|
||
|
|
"grad_norm": 0.24351662397384644,
|
||
|
|
"learning_rate": 3.752364170012956e-05,
|
||
|
|
"loss": 0.39435544013977053,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7449617790132036,
|
||
|
|
"grad_norm": 0.2532612383365631,
|
||
|
|
"learning_rate": 3.739755639025543e-05,
|
||
|
|
"loss": 0.3988224506378174,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7560806115357888,
|
||
|
|
"grad_norm": 0.24978569149971008,
|
||
|
|
"learning_rate": 3.726856324777616e-05,
|
||
|
|
"loss": 0.3910386085510254,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7671994440583739,
|
||
|
|
"grad_norm": 0.25254756212234497,
|
||
|
|
"learning_rate": 3.713668383264288e-05,
|
||
|
|
"loss": 0.38977618217468263,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.778318276580959,
|
||
|
|
"grad_norm": 0.22012752294540405,
|
||
|
|
"learning_rate": 3.700194018721916e-05,
|
||
|
|
"loss": 0.38769371509552003,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7894371091035441,
|
||
|
|
"grad_norm": 0.22096213698387146,
|
||
|
|
"learning_rate": 3.686435483259686e-05,
|
||
|
|
"loss": 0.39794325828552246,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8005559416261293,
|
||
|
|
"grad_norm": 0.2342117577791214,
|
||
|
|
"learning_rate": 3.672395076483192e-05,
|
||
|
|
"loss": 0.39336109161376953,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8116747741487144,
|
||
|
|
"grad_norm": 0.2648347020149231,
|
||
|
|
"learning_rate": 3.658075145110083e-05,
|
||
|
|
"loss": 0.3947201490402222,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8227936066712995,
|
||
|
|
"grad_norm": 0.23970480263233185,
|
||
|
|
"learning_rate": 3.64347808257783e-05,
|
||
|
|
"loss": 0.39340207576751707,
|
||
|
|
"step": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8339124391938847,
|
||
|
|
"grad_norm": 0.23063965141773224,
|
||
|
|
"learning_rate": 3.6286063286436826e-05,
|
||
|
|
"loss": 0.3845417261123657,
|
||
|
|
"step": 750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8450312717164697,
|
||
|
|
"grad_norm": 0.20158712565898895,
|
||
|
|
"learning_rate": 3.613462368976894e-05,
|
||
|
|
"loss": 0.3933848857879639,
|
||
|
|
"step": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8561501042390549,
|
||
|
|
"grad_norm": 0.20051920413970947,
|
||
|
|
"learning_rate": 3.598048734743262e-05,
|
||
|
|
"loss": 0.3893456935882568,
|
||
|
|
"step": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8672689367616401,
|
||
|
|
"grad_norm": 0.2043953388929367,
|
||
|
|
"learning_rate": 3.58236800218207e-05,
|
||
|
|
"loss": 0.38582921028137207,
|
||
|
|
"step": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8783877692842251,
|
||
|
|
"grad_norm": 0.20056948065757751,
|
||
|
|
"learning_rate": 3.566422792175489e-05,
|
||
|
|
"loss": 0.38297524452209475,
|
||
|
|
"step": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8895066018068103,
|
||
|
|
"grad_norm": 0.19881410896778107,
|
||
|
|
"learning_rate": 3.550215769810532e-05,
|
||
|
|
"loss": 0.3857751369476318,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9006254343293955,
|
||
|
|
"grad_norm": 0.2066773623228073,
|
||
|
|
"learning_rate": 3.5337496439336014e-05,
|
||
|
|
"loss": 0.392465877532959,
|
||
|
|
"step": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9117442668519805,
|
||
|
|
"grad_norm": 0.20534034073352814,
|
||
|
|
"learning_rate": 3.5170271666977383e-05,
|
||
|
|
"loss": 0.3881709098815918,
|
||
|
|
"step": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9228630993745657,
|
||
|
|
"grad_norm": 0.1855618804693222,
|
||
|
|
"learning_rate": 3.5000511331026224e-05,
|
||
|
|
"loss": 0.3881243705749512,
|
||
|
|
"step": 830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9339819318971508,
|
||
|
|
"grad_norm": 0.18303897976875305,
|
||
|
|
"learning_rate": 3.4828243805274166e-05,
|
||
|
|
"loss": 0.38112101554870603,
|
||
|
|
"step": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9451007644197359,
|
||
|
|
"grad_norm": 0.1936378926038742,
|
||
|
|
"learning_rate": 3.4653497882565276e-05,
|
||
|
|
"loss": 0.38739733695983886,
|
||
|
|
"step": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9562195969423211,
|
||
|
|
"grad_norm": 0.2301376312971115,
|
||
|
|
"learning_rate": 3.44763027699836e-05,
|
||
|
|
"loss": 0.3910486936569214,
|
||
|
|
"step": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9673384294649062,
|
||
|
|
"grad_norm": 0.20131757855415344,
|
||
|
|
"learning_rate": 3.429668808397147e-05,
|
||
|
|
"loss": 0.3879512071609497,
|
||
|
|
"step": 870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9784572619874913,
|
||
|
|
"grad_norm": 0.20710399746894836,
|
||
|
|
"learning_rate": 3.4114683845379465e-05,
|
||
|
|
"loss": 0.38755533695220945,
|
||
|
|
"step": 880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9895760945100764,
|
||
|
|
"grad_norm": 0.2099325954914093,
|
||
|
|
"learning_rate": 3.393032047444862e-05,
|
||
|
|
"loss": 0.3850740432739258,
|
||
|
|
"step": 890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 0.25888943672180176,
|
||
|
|
"learning_rate": 3.3743628785726075e-05,
|
||
|
|
"loss": 0.3849788665771484,
|
||
|
|
"step": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.011118832522585,
|
||
|
|
"grad_norm": 0.22799654304981232,
|
||
|
|
"learning_rate": 3.355463998291465e-05,
|
||
|
|
"loss": 0.34186859130859376,
|
||
|
|
"step": 910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0222376650451703,
|
||
|
|
"grad_norm": 0.22239384055137634,
|
||
|
|
"learning_rate": 3.3363385653657486e-05,
|
||
|
|
"loss": 0.3563653230667114,
|
||
|
|
"step": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0333564975677554,
|
||
|
|
"grad_norm": 0.2089320719242096,
|
||
|
|
"learning_rate": 3.3169897764258476e-05,
|
||
|
|
"loss": 0.34113943576812744,
|
||
|
|
"step": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0444753300903404,
|
||
|
|
"grad_norm": 0.28375443816185,
|
||
|
|
"learning_rate": 3.2974208654339405e-05,
|
||
|
|
"loss": 0.33684582710266114,
|
||
|
|
"step": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0555941626129257,
|
||
|
|
"grad_norm": 0.21052372455596924,
|
||
|
|
"learning_rate": 3.277635103143467e-05,
|
||
|
|
"loss": 0.33628795146942136,
|
||
|
|
"step": 950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0667129951355108,
|
||
|
|
"grad_norm": 0.20992518961429596,
|
||
|
|
"learning_rate": 3.2576357965524574e-05,
|
||
|
|
"loss": 0.3357236862182617,
|
||
|
|
"step": 960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0778318276580958,
|
||
|
|
"grad_norm": 0.22243370115756989,
|
||
|
|
"learning_rate": 3.237426288350798e-05,
|
||
|
|
"loss": 0.33824107646942136,
|
||
|
|
"step": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.088950660180681,
|
||
|
|
"grad_norm": 0.2306300699710846,
|
||
|
|
"learning_rate": 3.217009956361531e-05,
|
||
|
|
"loss": 0.34063472747802737,
|
||
|
|
"step": 980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1000694927032661,
|
||
|
|
"grad_norm": 0.19953912496566772,
|
||
|
|
"learning_rate": 3.196390212976291e-05,
|
||
|
|
"loss": 0.34016103744506837,
|
||
|
|
"step": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1111883252258512,
|
||
|
|
"grad_norm": 0.2101643830537796,
|
||
|
|
"learning_rate": 3.1755705045849465e-05,
|
||
|
|
"loss": 0.3383723974227905,
|
||
|
|
"step": 1000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1223071577484365,
|
||
|
|
"grad_norm": 0.20528212189674377,
|
||
|
|
"learning_rate": 3.154554310999578e-05,
|
||
|
|
"loss": 0.33558709621429444,
|
||
|
|
"step": 1010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1334259902710215,
|
||
|
|
"grad_norm": 0.18450650572776794,
|
||
|
|
"learning_rate": 3.133345144872859e-05,
|
||
|
|
"loss": 0.3336428165435791,
|
||
|
|
"step": 1020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1445448227936066,
|
||
|
|
"grad_norm": 0.18666331470012665,
|
||
|
|
"learning_rate": 3.111946551110947e-05,
|
||
|
|
"loss": 0.33807053565979006,
|
||
|
|
"step": 1030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1556636553161919,
|
||
|
|
"grad_norm": 0.20218265056610107,
|
||
|
|
"learning_rate": 3.09036210628099e-05,
|
||
|
|
"loss": 0.33912909030914307,
|
||
|
|
"step": 1040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.166782487838777,
|
||
|
|
"grad_norm": 0.18102531135082245,
|
||
|
|
"learning_rate": 3.068595418013339e-05,
|
||
|
|
"loss": 0.3401691436767578,
|
||
|
|
"step": 1050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.177901320361362,
|
||
|
|
"grad_norm": 0.1858808994293213,
|
||
|
|
"learning_rate": 3.046650124398562e-05,
|
||
|
|
"loss": 0.3396461963653564,
|
||
|
|
"step": 1060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.1890201528839472,
|
||
|
|
"grad_norm": 0.21451160311698914,
|
||
|
|
"learning_rate": 3.0245298933793798e-05,
|
||
|
|
"loss": 0.3356295108795166,
|
||
|
|
"step": 1070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2001389854065323,
|
||
|
|
"grad_norm": 0.24296917021274567,
|
||
|
|
"learning_rate": 3.0022384221376017e-05,
|
||
|
|
"loss": 0.3362587928771973,
|
||
|
|
"step": 1080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2112578179291174,
|
||
|
|
"grad_norm": 0.22454988956451416,
|
||
|
|
"learning_rate": 2.9797794364761743e-05,
|
||
|
|
"loss": 0.33704962730407717,
|
||
|
|
"step": 1090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2223766504517026,
|
||
|
|
"grad_norm": 0.20934318006038666,
|
||
|
|
"learning_rate": 2.9571566901964552e-05,
|
||
|
|
"loss": 0.3362391471862793,
|
||
|
|
"step": 1100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2334954829742877,
|
||
|
|
"grad_norm": 0.20203141868114471,
|
||
|
|
"learning_rate": 2.9343739644707986e-05,
|
||
|
|
"loss": 0.33880271911621096,
|
||
|
|
"step": 1110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2446143154968727,
|
||
|
|
"grad_norm": 0.21586163341999054,
|
||
|
|
"learning_rate": 2.911435067210569e-05,
|
||
|
|
"loss": 0.3303499698638916,
|
||
|
|
"step": 1120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.255733148019458,
|
||
|
|
"grad_norm": 0.21403352916240692,
|
||
|
|
"learning_rate": 2.8883438324296866e-05,
|
||
|
|
"loss": 0.3369396686553955,
|
||
|
|
"step": 1130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.266851980542043,
|
||
|
|
"grad_norm": 0.2058866173028946,
|
||
|
|
"learning_rate": 2.8651041196038098e-05,
|
||
|
|
"loss": 0.33591766357421876,
|
||
|
|
"step": 1140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2779708130646283,
|
||
|
|
"grad_norm": 0.1970527321100235,
|
||
|
|
"learning_rate": 2.8417198130252584e-05,
|
||
|
|
"loss": 0.337198281288147,
|
||
|
|
"step": 1150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.2890896455872134,
|
||
|
|
"grad_norm": 0.2015780508518219,
|
||
|
|
"learning_rate": 2.8181948211537954e-05,
|
||
|
|
"loss": 0.33505361080169677,
|
||
|
|
"step": 1160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3002084781097984,
|
||
|
|
"grad_norm": 0.19171395897865295,
|
||
|
|
"learning_rate": 2.7945330759633642e-05,
|
||
|
|
"loss": 0.3337593078613281,
|
||
|
|
"step": 1170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3113273106323837,
|
||
|
|
"grad_norm": 0.1808820515871048,
|
||
|
|
"learning_rate": 2.770738532284897e-05,
|
||
|
|
"loss": 0.3352059364318848,
|
||
|
|
"step": 1180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3224461431549688,
|
||
|
|
"grad_norm": 0.19644634425640106,
|
||
|
|
"learning_rate": 2.746815167145303e-05,
|
||
|
|
"loss": 0.34058656692504885,
|
||
|
|
"step": 1190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3335649756775538,
|
||
|
|
"grad_norm": 0.20617882907390594,
|
||
|
|
"learning_rate": 2.7227669791027497e-05,
|
||
|
|
"loss": 0.33803424835205076,
|
||
|
|
"step": 1200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.344683808200139,
|
||
|
|
"grad_norm": 0.20170675218105316,
|
||
|
|
"learning_rate": 2.6985979875783388e-05,
|
||
|
|
"loss": 0.3345954418182373,
|
||
|
|
"step": 1210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3558026407227242,
|
||
|
|
"grad_norm": 0.20398379862308502,
|
||
|
|
"learning_rate": 2.6743122321843014e-05,
|
||
|
|
"loss": 0.3368945598602295,
|
||
|
|
"step": 1220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3669214732453092,
|
||
|
|
"grad_norm": 0.1875438690185547,
|
||
|
|
"learning_rate": 2.6499137720488163e-05,
|
||
|
|
"loss": 0.3394474983215332,
|
||
|
|
"step": 1230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3780403057678945,
|
||
|
|
"grad_norm": 0.19435080885887146,
|
||
|
|
"learning_rate": 2.625406685137564e-05,
|
||
|
|
"loss": 0.33933372497558595,
|
||
|
|
"step": 1240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.3891591382904795,
|
||
|
|
"grad_norm": 0.19872593879699707,
|
||
|
|
"learning_rate": 2.6007950675721373e-05,
|
||
|
|
"loss": 0.33671281337738035,
|
||
|
|
"step": 1250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4002779708130646,
|
||
|
|
"grad_norm": 0.19588671624660492,
|
||
|
|
"learning_rate": 2.5760830329454117e-05,
|
||
|
|
"loss": 0.3355069637298584,
|
||
|
|
"step": 1260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4113968033356499,
|
||
|
|
"grad_norm": 0.1908605545759201,
|
||
|
|
"learning_rate": 2.5512747116339985e-05,
|
||
|
|
"loss": 0.3366411209106445,
|
||
|
|
"step": 1270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.422515635858235,
|
||
|
|
"grad_norm": 0.1810505986213684,
|
||
|
|
"learning_rate": 2.5263742501078957e-05,
|
||
|
|
"loss": 0.34022998809814453,
|
||
|
|
"step": 1280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.43363446838082,
|
||
|
|
"grad_norm": 0.21036396920681,
|
||
|
|
"learning_rate": 2.501385810237442e-05,
|
||
|
|
"loss": 0.3365932941436768,
|
||
|
|
"step": 1290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4447533009034053,
|
||
|
|
"grad_norm": 0.20304331183433533,
|
||
|
|
"learning_rate": 2.476313568597702e-05,
|
||
|
|
"loss": 0.3349552392959595,
|
||
|
|
"step": 1300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4558721334259903,
|
||
|
|
"grad_norm": 0.17836874723434448,
|
||
|
|
"learning_rate": 2.4511617157703915e-05,
|
||
|
|
"loss": 0.32939877510070803,
|
||
|
|
"step": 1310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4669909659485754,
|
||
|
|
"grad_norm": 0.192827507853508,
|
||
|
|
"learning_rate": 2.4259344556434656e-05,
|
||
|
|
"loss": 0.33145139217376707,
|
||
|
|
"step": 1320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4781097984711606,
|
||
|
|
"grad_norm": 0.18946915864944458,
|
||
|
|
"learning_rate": 2.400636004708475e-05,
|
||
|
|
"loss": 0.3342320919036865,
|
||
|
|
"step": 1330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.4892286309937457,
|
||
|
|
"grad_norm": 0.18936337530612946,
|
||
|
|
"learning_rate": 2.3752705913558228e-05,
|
||
|
|
"loss": 0.3325347423553467,
|
||
|
|
"step": 1340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5003474635163307,
|
||
|
|
"grad_norm": 0.1900292932987213,
|
||
|
|
"learning_rate": 2.3498424551680318e-05,
|
||
|
|
"loss": 0.3336307525634766,
|
||
|
|
"step": 1350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.511466296038916,
|
||
|
|
"grad_norm": 0.20230746269226074,
|
||
|
|
"learning_rate": 2.3243558462111354e-05,
|
||
|
|
"loss": 0.33458542823791504,
|
||
|
|
"step": 1360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.522585128561501,
|
||
|
|
"grad_norm": 0.20732760429382324,
|
||
|
|
"learning_rate": 2.2988150243243235e-05,
|
||
|
|
"loss": 0.3298256158828735,
|
||
|
|
"step": 1370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5337039610840861,
|
||
|
|
"grad_norm": 0.19748058915138245,
|
||
|
|
"learning_rate": 2.273224258407951e-05,
|
||
|
|
"loss": 0.3372241973876953,
|
||
|
|
"step": 1380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5448227936066714,
|
||
|
|
"grad_norm": 0.19819419085979462,
|
||
|
|
"learning_rate": 2.2475878257100333e-05,
|
||
|
|
"loss": 0.3346505641937256,
|
||
|
|
"step": 1390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5559416261292565,
|
||
|
|
"grad_norm": 0.18096713721752167,
|
||
|
|
"learning_rate": 2.2219100111113408e-05,
|
||
|
|
"loss": 0.33000621795654295,
|
||
|
|
"step": 1400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5670604586518415,
|
||
|
|
"grad_norm": 0.17553280293941498,
|
||
|
|
"learning_rate": 2.196195106409232e-05,
|
||
|
|
"loss": 0.32919626235961913,
|
||
|
|
"step": 1410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5781792911744268,
|
||
|
|
"grad_norm": 0.18030349910259247,
|
||
|
|
"learning_rate": 2.1704474096003135e-05,
|
||
|
|
"loss": 0.3363958835601807,
|
||
|
|
"step": 1420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.5892981236970118,
|
||
|
|
"grad_norm": 0.17756561934947968,
|
||
|
|
"learning_rate": 2.1446712241620734e-05,
|
||
|
|
"loss": 0.33312478065490725,
|
||
|
|
"step": 1430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.600416956219597,
|
||
|
|
"grad_norm": 0.17333616316318512,
|
||
|
|
"learning_rate": 2.118870858333599e-05,
|
||
|
|
"loss": 0.327138090133667,
|
||
|
|
"step": 1440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6115357887421822,
|
||
|
|
"grad_norm": 0.18877775967121124,
|
||
|
|
"learning_rate": 2.093050624395494e-05,
|
||
|
|
"loss": 0.334153938293457,
|
||
|
|
"step": 1450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6226546212647672,
|
||
|
|
"grad_norm": 0.2033713012933731,
|
||
|
|
"learning_rate": 2.0672148379491234e-05,
|
||
|
|
"loss": 0.32985107898712157,
|
||
|
|
"step": 1460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6337734537873523,
|
||
|
|
"grad_norm": 0.18154199421405792,
|
||
|
|
"learning_rate": 2.0413678171953056e-05,
|
||
|
|
"loss": 0.3321858882904053,
|
||
|
|
"step": 1470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6448922863099376,
|
||
|
|
"grad_norm": 0.1664343774318695,
|
||
|
|
"learning_rate": 2.0155138822125608e-05,
|
||
|
|
"loss": 0.3292757511138916,
|
||
|
|
"step": 1480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6560111188325226,
|
||
|
|
"grad_norm": 0.17724835872650146,
|
||
|
|
"learning_rate": 1.9896573542350576e-05,
|
||
|
|
"loss": 0.3216125011444092,
|
||
|
|
"step": 1490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.6671299513551077,
|
||
|
|
"grad_norm": 0.1763281524181366,
|
||
|
|
"learning_rate": 1.9638025549303576e-05,
|
||
|
|
"loss": 0.33061861991882324,
|
||
|
|
"step": 1500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.678248783877693,
|
||
|
|
"grad_norm": 0.1832234263420105,
|
||
|
|
"learning_rate": 1.9379538056770927e-05,
|
||
|
|
"loss": 0.33137152194976804,
|
||
|
|
"step": 1510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.689367616400278,
|
||
|
|
"grad_norm": 0.18359725177288055,
|
||
|
|
"learning_rate": 1.912115426842686e-05,
|
||
|
|
"loss": 0.33290562629699705,
|
||
|
|
"step": 1520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.700486448922863,
|
||
|
|
"grad_norm": 0.18772786855697632,
|
||
|
|
"learning_rate": 1.8862917370612454e-05,
|
||
|
|
"loss": 0.3290142059326172,
|
||
|
|
"step": 1530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7116052814454483,
|
||
|
|
"grad_norm": 0.178235724568367,
|
||
|
|
"learning_rate": 1.8604870525117496e-05,
|
||
|
|
"loss": 0.3255646228790283,
|
||
|
|
"step": 1540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7227241139680334,
|
||
|
|
"grad_norm": 0.17328216135501862,
|
||
|
|
"learning_rate": 1.8347056861966333e-05,
|
||
|
|
"loss": 0.3327143907546997,
|
||
|
|
"step": 1550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7338429464906184,
|
||
|
|
"grad_norm": 0.17393898963928223,
|
||
|
|
"learning_rate": 1.8089519472209168e-05,
|
||
|
|
"loss": 0.3347191572189331,
|
||
|
|
"step": 1560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7449617790132037,
|
||
|
|
"grad_norm": 0.1645958125591278,
|
||
|
|
"learning_rate": 1.7832301400719793e-05,
|
||
|
|
"loss": 0.326206111907959,
|
||
|
|
"step": 1570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7560806115357888,
|
||
|
|
"grad_norm": 0.1829153597354889,
|
||
|
|
"learning_rate": 1.7575445639001026e-05,
|
||
|
|
"loss": 0.3275812387466431,
|
||
|
|
"step": 1580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7671994440583738,
|
||
|
|
"grad_norm": 0.1847892552614212,
|
||
|
|
"learning_rate": 1.7318995117999158e-05,
|
||
|
|
"loss": 0.32840893268585203,
|
||
|
|
"step": 1590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.778318276580959,
|
||
|
|
"grad_norm": 0.1821216493844986,
|
||
|
|
"learning_rate": 1.706299270092842e-05,
|
||
|
|
"loss": 0.3295578956604004,
|
||
|
|
"step": 1600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.7894371091035441,
|
||
|
|
"grad_norm": 0.18880750238895416,
|
||
|
|
"learning_rate": 1.6807481176106816e-05,
|
||
|
|
"loss": 0.3292530536651611,
|
||
|
|
"step": 1610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8005559416261292,
|
||
|
|
"grad_norm": 0.16780254244804382,
|
||
|
|
"learning_rate": 1.655250324980447e-05,
|
||
|
|
"loss": 0.32708158493041994,
|
||
|
|
"step": 1620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8116747741487145,
|
||
|
|
"grad_norm": 0.1843137890100479,
|
||
|
|
"learning_rate": 1.6298101539105712e-05,
|
||
|
|
"loss": 0.3281073093414307,
|
||
|
|
"step": 1630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8227936066712995,
|
||
|
|
"grad_norm": 0.1689286231994629,
|
||
|
|
"learning_rate": 1.604431856478602e-05,
|
||
|
|
"loss": 0.32400391101837156,
|
||
|
|
"step": 1640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8339124391938846,
|
||
|
|
"grad_norm": 0.18957920372486115,
|
||
|
|
"learning_rate": 1.5791196744205094e-05,
|
||
|
|
"loss": 0.3259273052215576,
|
||
|
|
"step": 1650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8450312717164699,
|
||
|
|
"grad_norm": 0.17294242978096008,
|
||
|
|
"learning_rate": 1.5538778384217215e-05,
|
||
|
|
"loss": 0.3238994598388672,
|
||
|
|
"step": 1660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.856150104239055,
|
||
|
|
"grad_norm": 0.17969508469104767,
|
||
|
|
"learning_rate": 1.5287105674100053e-05,
|
||
|
|
"loss": 0.32615640163421633,
|
||
|
|
"step": 1670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.86726893676164,
|
||
|
|
"grad_norm": 0.17494842410087585,
|
||
|
|
"learning_rate": 1.5036220678503137e-05,
|
||
|
|
"loss": 0.3246027946472168,
|
||
|
|
"step": 1680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8783877692842252,
|
||
|
|
"grad_norm": 0.17964747548103333,
|
||
|
|
"learning_rate": 1.4786165330417173e-05,
|
||
|
|
"loss": 0.325272798538208,
|
||
|
|
"step": 1690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.8895066018068103,
|
||
|
|
"grad_norm": 0.1734607219696045,
|
||
|
|
"learning_rate": 1.4536981424165334e-05,
|
||
|
|
"loss": 0.32622013092041013,
|
||
|
|
"step": 1700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9006254343293953,
|
||
|
|
"grad_norm": 0.19146278500556946,
|
||
|
|
"learning_rate": 1.4288710608417754e-05,
|
||
|
|
"loss": 0.32583372592926024,
|
||
|
|
"step": 1710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9117442668519806,
|
||
|
|
"grad_norm": 0.1902935951948166,
|
||
|
|
"learning_rate": 1.404139437923036e-05,
|
||
|
|
"loss": 0.32550692558288574,
|
||
|
|
"step": 1720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9228630993745657,
|
||
|
|
"grad_norm": 0.16681939363479614,
|
||
|
|
"learning_rate": 1.3795074073109211e-05,
|
||
|
|
"loss": 0.3274375438690186,
|
||
|
|
"step": 1730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9339819318971507,
|
||
|
|
"grad_norm": 0.17112983763217926,
|
||
|
|
"learning_rate": 1.3549790860101481e-05,
|
||
|
|
"loss": 0.3290217399597168,
|
||
|
|
"step": 1740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.945100764419736,
|
||
|
|
"grad_norm": 0.16517910361289978,
|
||
|
|
"learning_rate": 1.3305585736914318e-05,
|
||
|
|
"loss": 0.3270266056060791,
|
||
|
|
"step": 1750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.956219596942321,
|
||
|
|
"grad_norm": 0.20247752964496613,
|
||
|
|
"learning_rate": 1.3062499520062608e-05,
|
||
|
|
"loss": 0.32512893676757815,
|
||
|
|
"step": 1760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9673384294649061,
|
||
|
|
"grad_norm": 0.1725420504808426,
|
||
|
|
"learning_rate": 1.2820572839046915e-05,
|
||
|
|
"loss": 0.32194349765777586,
|
||
|
|
"step": 1770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9784572619874914,
|
||
|
|
"grad_norm": 0.19890804588794708,
|
||
|
|
"learning_rate": 1.2579846129562663e-05,
|
||
|
|
"loss": 0.32468571662902834,
|
||
|
|
"step": 1780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.9895760945100764,
|
||
|
|
"grad_norm": 0.17223462462425232,
|
||
|
|
"learning_rate": 1.2340359626741676e-05,
|
||
|
|
"loss": 0.31974453926086427,
|
||
|
|
"step": 1790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 0.23255887627601624,
|
||
|
|
"learning_rate": 1.2102153358427264e-05,
|
||
|
|
"loss": 0.32196643352508547,
|
||
|
|
"step": 1800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0111188325225853,
|
||
|
|
"grad_norm": 0.2428450584411621,
|
||
|
|
"learning_rate": 1.1865267138484e-05,
|
||
|
|
"loss": 0.2720228672027588,
|
||
|
|
"step": 1810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.02223766504517,
|
||
|
|
"grad_norm": 0.19334714114665985,
|
||
|
|
"learning_rate": 1.1629740560143162e-05,
|
||
|
|
"loss": 0.2708899974822998,
|
||
|
|
"step": 1820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0333564975677554,
|
||
|
|
"grad_norm": 0.18385189771652222,
|
||
|
|
"learning_rate": 1.139561298938515e-05,
|
||
|
|
"loss": 0.2705179214477539,
|
||
|
|
"step": 1830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0444753300903407,
|
||
|
|
"grad_norm": 0.1807754784822464,
|
||
|
|
"learning_rate": 1.1162923558359849e-05,
|
||
|
|
"loss": 0.2721697807312012,
|
||
|
|
"step": 1840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0555941626129255,
|
||
|
|
"grad_norm": 0.18753038346767426,
|
||
|
|
"learning_rate": 1.0931711158846024e-05,
|
||
|
|
"loss": 0.2725118398666382,
|
||
|
|
"step": 1850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.0667129951355108,
|
||
|
|
"grad_norm": 0.17677700519561768,
|
||
|
|
"learning_rate": 1.0702014435750985e-05,
|
||
|
|
"loss": 0.27192416191101076,
|
||
|
|
"step": 1860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.077831827658096,
|
||
|
|
"grad_norm": 0.1743370145559311,
|
||
|
|
"learning_rate": 1.0473871780651435e-05,
|
||
|
|
"loss": 0.27294752597808836,
|
||
|
|
"step": 1870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.088950660180681,
|
||
|
|
"grad_norm": 0.1780771017074585,
|
||
|
|
"learning_rate": 1.0247321325376704e-05,
|
||
|
|
"loss": 0.2742859601974487,
|
||
|
|
"step": 1880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.100069492703266,
|
||
|
|
"grad_norm": 0.17365196347236633,
|
||
|
|
"learning_rate": 1.00224009356354e-05,
|
||
|
|
"loss": 0.2734071254730225,
|
||
|
|
"step": 1890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1111883252258514,
|
||
|
|
"grad_norm": 0.1772630214691162,
|
||
|
|
"learning_rate": 9.799148204686495e-06,
|
||
|
|
"loss": 0.27077765464782716,
|
||
|
|
"step": 1900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1223071577484363,
|
||
|
|
"grad_norm": 0.17199768126010895,
|
||
|
|
"learning_rate": 9.577600447055983e-06,
|
||
|
|
"loss": 0.2729313373565674,
|
||
|
|
"step": 1910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1334259902710215,
|
||
|
|
"grad_norm": 0.17337964475154877,
|
||
|
|
"learning_rate": 9.357794692300134e-06,
|
||
|
|
"loss": 0.27156963348388674,
|
||
|
|
"step": 1920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.144544822793607,
|
||
|
|
"grad_norm": 0.1897203028202057,
|
||
|
|
"learning_rate": 9.13976767881634e-06,
|
||
|
|
"loss": 0.27226369380950927,
|
||
|
|
"step": 1930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.1556636553161916,
|
||
|
|
"grad_norm": 0.17273284494876862,
|
||
|
|
"learning_rate": 8.923555847702675e-06,
|
||
|
|
"loss": 0.2755557060241699,
|
||
|
|
"step": 1940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.166782487838777,
|
||
|
|
"grad_norm": 0.1792151778936386,
|
||
|
|
"learning_rate": 8.709195336667102e-06,
|
||
|
|
"loss": 0.2707196235656738,
|
||
|
|
"step": 1950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.177901320361362,
|
||
|
|
"grad_norm": 0.1746506690979004,
|
||
|
|
"learning_rate": 8.496721973987423e-06,
|
||
|
|
"loss": 0.271243953704834,
|
||
|
|
"step": 1960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.189020152883947,
|
||
|
|
"grad_norm": 0.1751469522714615,
|
||
|
|
"learning_rate": 8.286171272522904e-06,
|
||
|
|
"loss": 0.2702665090560913,
|
||
|
|
"step": 1970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2001389854065323,
|
||
|
|
"grad_norm": 0.17558525502681732,
|
||
|
|
"learning_rate": 8.077578423778658e-06,
|
||
|
|
"loss": 0.2705970764160156,
|
||
|
|
"step": 1980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2112578179291176,
|
||
|
|
"grad_norm": 0.17805011570453644,
|
||
|
|
"learning_rate": 7.870978292023739e-06,
|
||
|
|
"loss": 0.275134539604187,
|
||
|
|
"step": 1990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2223766504517024,
|
||
|
|
"grad_norm": 0.178203746676445,
|
||
|
|
"learning_rate": 7.666405408463889e-06,
|
||
|
|
"loss": 0.27126991748809814,
|
||
|
|
"step": 2000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2334954829742877,
|
||
|
|
"grad_norm": 0.17006462812423706,
|
||
|
|
"learning_rate": 7.4638939654700235e-06,
|
||
|
|
"loss": 0.26952409744262695,
|
||
|
|
"step": 2010
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.244614315496873,
|
||
|
|
"grad_norm": 0.1694515347480774,
|
||
|
|
"learning_rate": 7.263477810863282e-06,
|
||
|
|
"loss": 0.27427287101745607,
|
||
|
|
"step": 2020
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.255733148019458,
|
||
|
|
"grad_norm": 0.19160398840904236,
|
||
|
|
"learning_rate": 7.065190442257686e-06,
|
||
|
|
"loss": 0.27138872146606446,
|
||
|
|
"step": 2030
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.266851980542043,
|
||
|
|
"grad_norm": 0.16741624474525452,
|
||
|
|
"learning_rate": 6.8690650014613505e-06,
|
||
|
|
"loss": 0.2695302486419678,
|
||
|
|
"step": 2040
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.2779708130646283,
|
||
|
|
"grad_norm": 0.16422736644744873,
|
||
|
|
"learning_rate": 6.675134268937158e-06,
|
||
|
|
"loss": 0.2692440032958984,
|
||
|
|
"step": 2050
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.289089645587213,
|
||
|
|
"grad_norm": 0.1828807145357132,
|
||
|
|
"learning_rate": 6.483430658323806e-06,
|
||
|
|
"loss": 0.26996283531188964,
|
||
|
|
"step": 2060
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3002084781097984,
|
||
|
|
"grad_norm": 0.17340506613254547,
|
||
|
|
"learning_rate": 6.293986211018208e-06,
|
||
|
|
"loss": 0.2742361783981323,
|
||
|
|
"step": 2070
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3113273106323837,
|
||
|
|
"grad_norm": 0.1660027652978897,
|
||
|
|
"learning_rate": 6.106832590820053e-06,
|
||
|
|
"loss": 0.2687552452087402,
|
||
|
|
"step": 2080
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3224461431549686,
|
||
|
|
"grad_norm": 0.17267292737960815,
|
||
|
|
"learning_rate": 5.922001078639541e-06,
|
||
|
|
"loss": 0.273982572555542,
|
||
|
|
"step": 2090
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.333564975677554,
|
||
|
|
"grad_norm": 0.16563960909843445,
|
||
|
|
"learning_rate": 5.739522567269052e-06,
|
||
|
|
"loss": 0.27311880588531495,
|
||
|
|
"step": 2100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.344683808200139,
|
||
|
|
"grad_norm": 0.16006183624267578,
|
||
|
|
"learning_rate": 5.559427556219734e-06,
|
||
|
|
"loss": 0.273662805557251,
|
||
|
|
"step": 2110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.355802640722724,
|
||
|
|
"grad_norm": 0.16579481959342957,
|
||
|
|
"learning_rate": 5.381746146623805e-06,
|
||
|
|
"loss": 0.269588041305542,
|
||
|
|
"step": 2120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.366921473245309,
|
||
|
|
"grad_norm": 0.16734926402568817,
|
||
|
|
"learning_rate": 5.20650803620343e-06,
|
||
|
|
"loss": 0.2678502321243286,
|
||
|
|
"step": 2130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3780403057678945,
|
||
|
|
"grad_norm": 0.16894319653511047,
|
||
|
|
"learning_rate": 5.033742514307061e-06,
|
||
|
|
"loss": 0.273479700088501,
|
||
|
|
"step": 2140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.3891591382904793,
|
||
|
|
"grad_norm": 0.16338688135147095,
|
||
|
|
"learning_rate": 4.863478457013977e-06,
|
||
|
|
"loss": 0.27426838874816895,
|
||
|
|
"step": 2150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4002779708130646,
|
||
|
|
"grad_norm": 0.16434775292873383,
|
||
|
|
"learning_rate": 4.6957443223079425e-06,
|
||
|
|
"loss": 0.27318222522735597,
|
||
|
|
"step": 2160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.41139680333565,
|
||
|
|
"grad_norm": 0.16170066595077515,
|
||
|
|
"learning_rate": 4.530568145320724e-06,
|
||
|
|
"loss": 0.2738449811935425,
|
||
|
|
"step": 2170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4225156358582347,
|
||
|
|
"grad_norm": 0.16669511795043945,
|
||
|
|
"learning_rate": 4.367977533646297e-06,
|
||
|
|
"loss": 0.26780765056610106,
|
||
|
|
"step": 2180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.43363446838082,
|
||
|
|
"grad_norm": 0.15889227390289307,
|
||
|
|
"learning_rate": 4.207999662726516e-06,
|
||
|
|
"loss": 0.2710136413574219,
|
||
|
|
"step": 2190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4447533009034053,
|
||
|
|
"grad_norm": 0.15884160995483398,
|
||
|
|
"learning_rate": 4.050661271308969e-06,
|
||
|
|
"loss": 0.27028565406799315,
|
||
|
|
"step": 2200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.45587213342599,
|
||
|
|
"grad_norm": 0.16362424194812775,
|
||
|
|
"learning_rate": 3.895988656977898e-06,
|
||
|
|
"loss": 0.2697636604309082,
|
||
|
|
"step": 2210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4669909659485754,
|
||
|
|
"grad_norm": 0.160287007689476,
|
||
|
|
"learning_rate": 3.744007671758778e-06,
|
||
|
|
"loss": 0.26787629127502444,
|
||
|
|
"step": 2220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4781097984711606,
|
||
|
|
"grad_norm": 0.15940804779529572,
|
||
|
|
"learning_rate": 3.59474371779742e-06,
|
||
|
|
"loss": 0.2722454071044922,
|
||
|
|
"step": 2230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.4892286309937455,
|
||
|
|
"grad_norm": 0.17461982369422913,
|
||
|
|
"learning_rate": 3.4482217431142394e-06,
|
||
|
|
"loss": 0.27159295082092283,
|
||
|
|
"step": 2240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5003474635163307,
|
||
|
|
"grad_norm": 0.1559775024652481,
|
||
|
|
"learning_rate": 3.304466237434458e-06,
|
||
|
|
"loss": 0.26755647659301757,
|
||
|
|
"step": 2250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.511466296038916,
|
||
|
|
"grad_norm": 0.1622040718793869,
|
||
|
|
"learning_rate": 3.1635012280948496e-06,
|
||
|
|
"loss": 0.2669699668884277,
|
||
|
|
"step": 2260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5225851285615013,
|
||
|
|
"grad_norm": 0.16182290017604828,
|
||
|
|
"learning_rate": 3.0253502760278406e-06,
|
||
|
|
"loss": 0.2691537380218506,
|
||
|
|
"step": 2270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.533703961084086,
|
||
|
|
"grad_norm": 0.15870854258537292,
|
||
|
|
"learning_rate": 2.8900364718234987e-06,
|
||
|
|
"loss": 0.2669252872467041,
|
||
|
|
"step": 2280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5448227936066714,
|
||
|
|
"grad_norm": 0.17056338489055634,
|
||
|
|
"learning_rate": 2.7575824318701806e-06,
|
||
|
|
"loss": 0.26923959255218505,
|
||
|
|
"step": 2290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5559416261292567,
|
||
|
|
"grad_norm": 0.16084995865821838,
|
||
|
|
"learning_rate": 2.6280102945744124e-06,
|
||
|
|
"loss": 0.2703924417495728,
|
||
|
|
"step": 2300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.5670604586518415,
|
||
|
|
"grad_norm": 0.1600581705570221,
|
||
|
|
"learning_rate": 2.501341716660699e-06,
|
||
|
|
"loss": 0.2688326358795166,
|
||
|
|
"step": 2310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.578179291174427,
|
||
|
|
"grad_norm": 0.15505236387252808,
|
||
|
|
"learning_rate": 2.377597869551762e-06,
|
||
|
|
"loss": 0.2724630832672119,
|
||
|
|
"step": 2320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.589298123697012,
|
||
|
|
"grad_norm": 0.1598374992609024,
|
||
|
|
"learning_rate": 2.2567994358299973e-06,
|
||
|
|
"loss": 0.27178168296813965,
|
||
|
|
"step": 2330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.600416956219597,
|
||
|
|
"grad_norm": 0.161187544465065,
|
||
|
|
"learning_rate": 2.138966605780537e-06,
|
||
|
|
"loss": 0.2691415548324585,
|
||
|
|
"step": 2340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.611535788742182,
|
||
|
|
"grad_norm": 0.16358298063278198,
|
||
|
|
"learning_rate": 2.024119074016664e-06,
|
||
|
|
"loss": 0.26769893169403075,
|
||
|
|
"step": 2350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6226546212647674,
|
||
|
|
"grad_norm": 0.159417986869812,
|
||
|
|
"learning_rate": 1.9122760361880364e-06,
|
||
|
|
"loss": 0.2699122428894043,
|
||
|
|
"step": 2360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6337734537873523,
|
||
|
|
"grad_norm": 0.15539702773094177,
|
||
|
|
"learning_rate": 1.8034561857723453e-06,
|
||
|
|
"loss": 0.2678532123565674,
|
||
|
|
"step": 2370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6448922863099376,
|
||
|
|
"grad_norm": 0.15744274854660034,
|
||
|
|
"learning_rate": 1.6976777109508446e-06,
|
||
|
|
"loss": 0.2715311050415039,
|
||
|
|
"step": 2380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.656011118832523,
|
||
|
|
"grad_norm": 0.15635013580322266,
|
||
|
|
"learning_rate": 1.5949582915684025e-06,
|
||
|
|
"loss": 0.26792240142822266,
|
||
|
|
"step": 2390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.6671299513551077,
|
||
|
|
"grad_norm": 0.1590019315481186,
|
||
|
|
"learning_rate": 1.4953150961784713e-06,
|
||
|
|
"loss": 0.2684680461883545,
|
||
|
|
"step": 2400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.678248783877693,
|
||
|
|
"grad_norm": 0.1583629995584488,
|
||
|
|
"learning_rate": 1.398764779173538e-06,
|
||
|
|
"loss": 0.2693314552307129,
|
||
|
|
"step": 2410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.689367616400278,
|
||
|
|
"grad_norm": 0.15858042240142822,
|
||
|
|
"learning_rate": 1.3053234780015012e-06,
|
||
|
|
"loss": 0.27136645317077634,
|
||
|
|
"step": 2420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.700486448922863,
|
||
|
|
"grad_norm": 0.16220812499523163,
|
||
|
|
"learning_rate": 1.2150068104684577e-06,
|
||
|
|
"loss": 0.2695932149887085,
|
||
|
|
"step": 2430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7116052814454483,
|
||
|
|
"grad_norm": 0.15654098987579346,
|
||
|
|
"learning_rate": 1.12782987212833e-06,
|
||
|
|
"loss": 0.2679957151412964,
|
||
|
|
"step": 2440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7227241139680336,
|
||
|
|
"grad_norm": 0.21130171418190002,
|
||
|
|
"learning_rate": 1.0438072337597972e-06,
|
||
|
|
"loss": 0.2688181400299072,
|
||
|
|
"step": 2450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7338429464906184,
|
||
|
|
"grad_norm": 0.1545591801404953,
|
||
|
|
"learning_rate": 9.6295293893093e-07,
|
||
|
|
"loss": 0.2718325138092041,
|
||
|
|
"step": 2460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7449617790132037,
|
||
|
|
"grad_norm": 0.15754657983779907,
|
||
|
|
"learning_rate": 8.852805016519417e-07,
|
||
|
|
"loss": 0.26870386600494384,
|
||
|
|
"step": 2470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.756080611535789,
|
||
|
|
"grad_norm": 0.1659633219242096,
|
||
|
|
"learning_rate": 8.108029041164566e-07,
|
||
|
|
"loss": 0.2723594903945923,
|
||
|
|
"step": 2480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.767199444058374,
|
||
|
|
"grad_norm": 0.15784206986427307,
|
||
|
|
"learning_rate": 7.395325945316623e-07,
|
||
|
|
"loss": 0.26740460395812987,
|
||
|
|
"step": 2490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.778318276580959,
|
||
|
|
"grad_norm": 0.1567256897687912,
|
||
|
|
"learning_rate": 6.714814850377082e-07,
|
||
|
|
"loss": 0.26805920600891114,
|
||
|
|
"step": 2500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.7894371091035444,
|
||
|
|
"grad_norm": 0.1606101393699646,
|
||
|
|
"learning_rate": 6.066609497167086e-07,
|
||
|
|
"loss": 0.26962528228759763,
|
||
|
|
"step": 2510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.800555941626129,
|
||
|
|
"grad_norm": 0.17048372328281403,
|
||
|
|
"learning_rate": 5.450818226916799e-07,
|
||
|
|
"loss": 0.26596574783325194,
|
||
|
|
"step": 2520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8116747741487145,
|
||
|
|
"grad_norm": 0.1672402024269104,
|
||
|
|
"learning_rate": 4.867543963157162e-07,
|
||
|
|
"loss": 0.27477524280548093,
|
||
|
|
"step": 2530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8227936066712997,
|
||
|
|
"grad_norm": 0.15722811222076416,
|
||
|
|
"learning_rate": 4.3168841945172347e-07,
|
||
|
|
"loss": 0.26975407600402834,
|
||
|
|
"step": 2540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8339124391938846,
|
||
|
|
"grad_norm": 0.15568679571151733,
|
||
|
|
"learning_rate": 3.798930958430025e-07,
|
||
|
|
"loss": 0.2677044630050659,
|
||
|
|
"step": 2550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.84503127171647,
|
||
|
|
"grad_norm": 0.15909866988658905,
|
||
|
|
"learning_rate": 3.3137708257491074e-07,
|
||
|
|
"loss": 0.2706420421600342,
|
||
|
|
"step": 2560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.856150104239055,
|
||
|
|
"grad_norm": 0.15334129333496094,
|
||
|
|
"learning_rate": 2.861484886279331e-07,
|
||
|
|
"loss": 0.26921744346618653,
|
||
|
|
"step": 2570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.86726893676164,
|
||
|
|
"grad_norm": 0.1526278257369995,
|
||
|
|
"learning_rate": 2.4421487352234376e-07,
|
||
|
|
"loss": 0.2678064823150635,
|
||
|
|
"step": 2580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8783877692842252,
|
||
|
|
"grad_norm": 0.1667514145374298,
|
||
|
|
"learning_rate": 2.0558324605469248e-07,
|
||
|
|
"loss": 0.26693019866943357,
|
||
|
|
"step": 2590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.8895066018068105,
|
||
|
|
"grad_norm": 0.15330937504768372,
|
||
|
|
"learning_rate": 1.7026006312635956e-07,
|
||
|
|
"loss": 0.266402268409729,
|
||
|
|
"step": 2600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9006254343293953,
|
||
|
|
"grad_norm": 0.1585593819618225,
|
||
|
|
"learning_rate": 1.3825122866435893e-07,
|
||
|
|
"loss": 0.2690951585769653,
|
||
|
|
"step": 2610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9117442668519806,
|
||
|
|
"grad_norm": 0.15491652488708496,
|
||
|
|
"learning_rate": 1.0956209263453421e-07,
|
||
|
|
"loss": 0.26980152130126955,
|
||
|
|
"step": 2620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.922863099374566,
|
||
|
|
"grad_norm": 0.1553027331829071,
|
||
|
|
"learning_rate": 8.419745014737412e-08,
|
||
|
|
"loss": 0.26876671314239503,
|
||
|
|
"step": 2630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9339819318971507,
|
||
|
|
"grad_norm": 0.15838363766670227,
|
||
|
|
"learning_rate": 6.216154065656233e-08,
|
||
|
|
"loss": 0.2718991279602051,
|
||
|
|
"step": 2640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.945100764419736,
|
||
|
|
"grad_norm": 0.1559874266386032,
|
||
|
|
"learning_rate": 4.345804725037983e-08,
|
||
|
|
"loss": 0.2709752321243286,
|
||
|
|
"step": 2650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9562195969423213,
|
||
|
|
"grad_norm": 0.15690867602825165,
|
||
|
|
"learning_rate": 2.8090096036119675e-08,
|
||
|
|
"loss": 0.26477723121643065,
|
||
|
|
"step": 2660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.967338429464906,
|
||
|
|
"grad_norm": 0.15188027918338776,
|
||
|
|
"learning_rate": 1.6060255617595943e-08,
|
||
|
|
"loss": 0.27164206504821775,
|
||
|
|
"step": 2670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9784572619874914,
|
||
|
|
"grad_norm": 0.15537431836128235,
|
||
|
|
"learning_rate": 7.370536665811667e-09,
|
||
|
|
"loss": 0.2670041561126709,
|
||
|
|
"step": 2680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 2.9895760945100767,
|
||
|
|
"grad_norm": 0.15168948471546173,
|
||
|
|
"learning_rate": 2.0223915829031828e-09,
|
||
|
|
"loss": 0.27080345153808594,
|
||
|
|
"step": 2690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 0.24983854591846466,
|
||
|
|
"learning_rate": 1.6714259387651256e-11,
|
||
|
|
"loss": 0.2644808292388916,
|
||
|
|
"step": 2700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 3.0,
|
||
|
|
"step": 2700,
|
||
|
|
"total_flos": 2.5566761492312123e+20,
|
||
|
|
"train_loss": 0.34524431255128646,
|
||
|
|
"train_runtime": 116173.3714,
|
||
|
|
"train_samples_per_second": 2.973,
|
||
|
|
"train_steps_per_second": 0.023
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 10,
|
||
|
|
"max_steps": 2700,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 3,
|
||
|
|
"save_steps": 500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 2.5566761492312123e+20,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|